加入美图网提取功能
This commit is contained in:
132
base/func_deepseek.py
Normal file
132
base/func_deepseek.py
Normal file
@@ -0,0 +1,132 @@
|
||||
import requests
|
||||
import json
|
||||
import logging
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class DeepSeek():
|
||||
def __init__(self, conf: dict) -> None:
|
||||
self.key = conf.get("key")
|
||||
self.api = conf.get("api")
|
||||
prompt = conf.get("prompt")
|
||||
self.model = conf.get("model")
|
||||
self.LOG = logging.getLogger("deepseek")
|
||||
self.conversation_list = {}
|
||||
self.system_content_msg = {"role": "system", "content": prompt}
|
||||
|
||||
def __repr__(self):
|
||||
return 'DeepSeek'
|
||||
|
||||
def get_answer(self, question: str, wxid: str) -> str:
|
||||
# 设置请求头
|
||||
self.updateMessage(wxid, question, "user")
|
||||
rsp = ""
|
||||
try:
|
||||
headers = {
|
||||
"Content-Type": "application/json; charset=utf-8",
|
||||
"Authorization": f"Bearer {self.key}"
|
||||
}
|
||||
# 设置请求的payload
|
||||
data = {
|
||||
"model": self.model,
|
||||
"messages": [
|
||||
self.system_content_msg,
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"{question}"
|
||||
}
|
||||
|
||||
]
|
||||
}
|
||||
# 发送POST请求
|
||||
response = requests.post(self.api, headers=headers, data=json.dumps(data), )
|
||||
response.encoding = 'utf-8'
|
||||
|
||||
# 输出响应内容
|
||||
print(response.status_code)
|
||||
# print(response.text)
|
||||
rsp = extract_content(response.text)
|
||||
self.updateMessage(wxid, rsp, "assistant")
|
||||
except Exception as e0:
|
||||
self.LOG.error(f"发生未知错误:{str(e0)}")
|
||||
return rsp
|
||||
|
||||
def updateMessage(self, wxid: str, question: str, role: str) -> None:
|
||||
now_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
||||
|
||||
time_mk = "当需要回答时间时请直接参考回复:"
|
||||
# 初始化聊天记录,组装系统信息
|
||||
if wxid not in self.conversation_list.keys():
|
||||
question_ = [
|
||||
self.system_content_msg,
|
||||
{"role": "system", "content": "" + time_mk + now_time}
|
||||
]
|
||||
self.conversation_list[wxid] = question_
|
||||
|
||||
# 当前问题
|
||||
content_question_ = {"role": role, "content": question}
|
||||
self.conversation_list[wxid].append(content_question_)
|
||||
|
||||
for cont in self.conversation_list[wxid]:
|
||||
if cont["role"] != "system":
|
||||
continue
|
||||
if cont["content"].startswith(time_mk):
|
||||
cont["content"] = time_mk + now_time
|
||||
|
||||
# 只存储10条记录,超过滚动清除
|
||||
i = len(self.conversation_list[wxid])
|
||||
if i > 10:
|
||||
print("滚动清除微信记录:" + wxid)
|
||||
# 删除多余的记录,倒着删,且跳过第一个的系统消息
|
||||
del self.conversation_list[wxid][1]
|
||||
|
||||
@staticmethod
|
||||
def value_check(conf: dict) -> bool:
|
||||
if conf:
|
||||
if conf.get("key") and conf.get("api") and conf.get("prompt"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# 解析JSON
|
||||
def extract_content(data_string):
|
||||
try:
|
||||
data = json.loads(data_string)
|
||||
# 提取content字段
|
||||
content = data["choices"][0]["message"].get("content", "")
|
||||
return content
|
||||
except json.JSONDecodeError:
|
||||
print("Invalid JSON")
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from configuration import Config
|
||||
|
||||
config = Config().DEEPSEEK
|
||||
if not config:
|
||||
exit(0)
|
||||
|
||||
chat = DeepSeek(config)
|
||||
|
||||
while True:
|
||||
q = input(">>> ")
|
||||
try:
|
||||
time_start = datetime.now() # 记录开始时间
|
||||
print(chat.get_answer(q, "Jyunere"))
|
||||
time_end = datetime.now() # 记录结束时间
|
||||
|
||||
print(f"{round((time_end - time_start).total_seconds(), 2)}s") # 计算的时间差为程序的执行时间,单位为秒/s
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
#
|
||||
# [
|
||||
# "windsurf/claude-3-5-sonnet",
|
||||
# "windsurf/gpt4o",
|
||||
# "windsurf/deepseek-chat",
|
||||
# "windsurf/deepseek-reasoner",
|
||||
# "windsurf/gpt4-o3-mini",
|
||||
# "windsurf/gemini-2.0-flash",
|
||||
# ]
|
||||
12
config.yaml
12
config.yaml
@@ -92,6 +92,18 @@ claude:
|
||||
key: 46a5674a-e978-491b-a810-5d54605f2c36
|
||||
api: http://127.0.0.1:8080/v1/chat/completions # 如果你不知道这是干嘛的,就不要改
|
||||
model: windsurf/gpt4o #
|
||||
prompt: '你是一个信息归纳分析工程师,你根据提问会搜索相关资料。经过信息精炼之后返回内容。
|
||||
请回复时以以下格式进行返回:
|
||||
- 问题描述:
|
||||
- 问题评价:分析问题的提出角度,如(财经、彩票、房产、股票、家居、教育、科技、社会、时尚、时政、体育、星座、游戏、娱乐)等
|
||||
- 总结:经过300个字以内的优化返回,返回内容请进行一定程度的结构化,方便快速阅读' # 根据需要对角色进行设定
|
||||
|
||||
# DEEPSEEK
|
||||
#
|
||||
deepseek:
|
||||
key: sk-14bf1893e78040989a43b6f05c07974a
|
||||
api: https://api.deepseek.com/chat/completions # 如果你不知道这是干嘛的,就不要改
|
||||
model: deepseek-chat #
|
||||
prompt: '你是一个信息归纳分析工程师,你根据提问会搜索相关资料。经过信息精炼之后返回内容。
|
||||
请回复时以以下格式进行返回:
|
||||
- 问题描述:
|
||||
|
||||
@@ -38,3 +38,4 @@ class Config(object):
|
||||
self.BardAssistant = yconfig.get("bard", {})
|
||||
self.ZhiPu = yconfig.get("zhipu", {})
|
||||
self.CLAUDE = yconfig.get("claude", {})
|
||||
self.DEEPSEEK =yconfig.get("deepseek",{})
|
||||
|
||||
4
robot.py
4
robot.py
@@ -40,8 +40,8 @@ from message_report.write_db import write_to_db, generate_and_send_ranking
|
||||
from message_storage.message_to_db import archive_message, get_messages
|
||||
from message_summary.message_summary_4o import message_summary
|
||||
from sehuatang.shehuatang import pdf_file_path
|
||||
from xiuren.meitu_dl import meitu_dowload_pic
|
||||
from xiuren.random_pic import get_xiuren_pic
|
||||
from xiuren.xiuren_dl import xiuren_dowload_pic
|
||||
|
||||
|
||||
class Robot(Job):
|
||||
@@ -538,7 +538,7 @@ class Robot(Job):
|
||||
|
||||
def xiu_ren_download_task(self):
|
||||
try:
|
||||
path = xiuren_dowload_pic()
|
||||
path = meitu_dowload_pic()
|
||||
self.wcf.send_file(path, "45317011307@chatroom")
|
||||
except Exception as e:
|
||||
self.LOG.error(f"xiuren_dowload_pic error:{e}")
|
||||
|
||||
85
templates/group_auto_invite_ui.html
Normal file
85
templates/group_auto_invite_ui.html
Normal file
@@ -0,0 +1,85 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>群组管理</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f4f4f4;
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
}
|
||||
h1 {
|
||||
text-align: center;
|
||||
color: #333;
|
||||
}
|
||||
.container {
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
background-color: white;
|
||||
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
.form-group {
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
input[type="text"], input[type="submit"], select {
|
||||
padding: 10px;
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
input[type="submit"] {
|
||||
background-color: #007bff;
|
||||
color: white;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
font-size: 16px;
|
||||
}
|
||||
input[type="submit"]:hover {
|
||||
background-color: #0056b3;
|
||||
}
|
||||
.result {
|
||||
margin-top: 20px;
|
||||
padding: 10px;
|
||||
background-color: #e7f4e7;
|
||||
border: 1px solid #d3f8d3;
|
||||
border-radius: 5px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>群组管理</h1>
|
||||
|
||||
<div class="container">
|
||||
<form method="POST">
|
||||
<div class="form-group">
|
||||
<label for="key">Key:</label>
|
||||
<input type="text" id="key" name="key" >
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="group_id">Group ID:</label>
|
||||
<input type="text" id="group_id" name="group_id" >
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="action">Action:</label>
|
||||
<select id="action" name="action">
|
||||
<option value="add">添加群组ID</option>
|
||||
<option value="del">删除群组ID</option>
|
||||
<option value="get">获取所有群组ID</option>
|
||||
<option value="get_first">获取第一个群组ID</option>
|
||||
<option value="get_instructions">获取群组指令</option>
|
||||
</select>
|
||||
</div>
|
||||
<input type="submit" value="提交">
|
||||
</form>
|
||||
|
||||
{% if result %}
|
||||
<div class="result">{{ result }}</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
50
templates/index.html
Normal file
50
templates/index.html
Normal file
@@ -0,0 +1,50 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>系统菜单</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f4f4f4;
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
}
|
||||
h1 {
|
||||
text-align: center;
|
||||
color: #333;
|
||||
}
|
||||
.container {
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
background-color: white;
|
||||
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
button {
|
||||
padding: 10px 20px;
|
||||
margin: 10px;
|
||||
background-color: #007bff;
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 5px;
|
||||
cursor: pointer;
|
||||
font-size: 16px;
|
||||
}
|
||||
button:hover {
|
||||
background-color: #0056b3;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>欢迎进入系统</h1>
|
||||
|
||||
<div class="container">
|
||||
<button onclick="window.location.href='/redis_operations'">群组管理</button>
|
||||
<button onclick="window.location.href='/messages'">查看消息列表</button>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
110
templates/message_list.html
Normal file
110
templates/message_list.html
Normal file
@@ -0,0 +1,110 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>消息列表</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f4f4f4;
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
}
|
||||
h1 {
|
||||
text-align: center;
|
||||
color: #333;
|
||||
}
|
||||
.container {
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
background-color: white;
|
||||
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
table th, table td {
|
||||
padding: 10px;
|
||||
text-align: left;
|
||||
border: 1px solid #ddd;
|
||||
}
|
||||
table th {
|
||||
background-color: #f4f4f4;
|
||||
}
|
||||
.table-container {
|
||||
max-height: 400px; /* 设置表格的最大高度 */
|
||||
overflow-y: auto; /* 启用垂直滚动条 */
|
||||
}
|
||||
.pagination {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
margin-top: 20px;
|
||||
}
|
||||
.pagination a {
|
||||
padding: 8px 16px;
|
||||
margin: 0 5px;
|
||||
text-decoration: none;
|
||||
color: #007bff;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 5px;
|
||||
}
|
||||
.pagination a:hover {
|
||||
background-color: #f1f1f1;
|
||||
}
|
||||
.pagination span {
|
||||
padding: 8px 16px;
|
||||
margin: 0 5px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>消息列表</h1>
|
||||
|
||||
<div class="container">
|
||||
<div class="table-container">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>群ID</th>
|
||||
<th>时间戳</th>
|
||||
<th>发送者</th>
|
||||
<th>内容</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for message in messages %}
|
||||
<tr>
|
||||
<td>{{ message[0] }}</td>
|
||||
<td>{{ message[1] }}</td>
|
||||
<td>{{ message[2] }}</td>
|
||||
<td>{{ message[3] }}</td>
|
||||
<td>{{ message[4] }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="pagination">
|
||||
{% if page > 1 %}
|
||||
<a href="/messages?page=1">首页</a>
|
||||
<a href="/messages?page={{ page - 1 }}">上一页</a>
|
||||
{% endif %}
|
||||
|
||||
<span>第 {{ page }} 页 / {{ total_pages }} 页</span>
|
||||
|
||||
{% if page < total_pages %}
|
||||
<a href="/messages?page={{ page + 1 }}">下一页</a>
|
||||
<a href="/messages?page={{ total_pages }}">末页</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
1
ui/README.md
Normal file
1
ui/README.md
Normal file
@@ -0,0 +1 @@
|
||||
# 制作UI进行群管理,群功能管理,不使用指令完成。
|
||||
60
ui/message_ui.py
Normal file
60
ui/message_ui.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from flask import Flask, render_template, request, jsonify
|
||||
|
||||
import os
|
||||
|
||||
from group_auto.group_auto_invite import add_mapping, del_mapping, get_first_group_id, get_group_ids
|
||||
from ui.messages_list import get_total_messages, get_messages
|
||||
|
||||
# 设置 Flask 实例化时指定模板文件夹路径
|
||||
app = Flask(__name__, template_folder=os.path.join(os.path.dirname(__file__), '..', 'templates'))
|
||||
|
||||
|
||||
# 主菜单页面
|
||||
@app.route('/')
|
||||
def index():
|
||||
return render_template('index.html')
|
||||
|
||||
|
||||
# Redis 操作页面
|
||||
@app.route('/redis_operations', methods=['GET', 'POST'])
|
||||
def redis_operations():
|
||||
if request.method == 'POST':
|
||||
key = request.form.get('key')
|
||||
group_id = request.form.get('group_id')
|
||||
action = request.form.get('action')
|
||||
|
||||
result = ''
|
||||
if action == 'add':
|
||||
result = add_mapping(key, group_id)
|
||||
elif action == 'del':
|
||||
result = del_mapping(key, group_id)
|
||||
elif action == 'get':
|
||||
result = get_group_ids(key)
|
||||
elif action == 'get_first':
|
||||
result = get_first_group_id(key)
|
||||
|
||||
return render_template('group_auto_invite_ui.html', result=result)
|
||||
|
||||
return render_template('group_auto_invite_ui.html', result='')
|
||||
|
||||
|
||||
# 显示消息列表(分页)
|
||||
@app.route('/messages', methods=['GET'])
|
||||
def messages():
|
||||
page = int(request.args.get('page', 1)) # 获取当前页,默认为第一页
|
||||
per_page = 10 # 每页显示10条数据
|
||||
messages = get_messages(page, per_page) # 获取指定页的数据
|
||||
total = get_total_messages() # 获取总的消息数量
|
||||
total_pages = (total // per_page) + (1 if total % per_page > 0 else 0) # 总页数
|
||||
|
||||
# 分页控制,确保当前页数在有效范围内
|
||||
if page > total_pages:
|
||||
page = total_pages
|
||||
if page < 1:
|
||||
page = 1
|
||||
|
||||
return render_template('message_list.html', messages=messages, page=page, total_pages=total_pages)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True)
|
||||
42
ui/messages_list.py
Normal file
42
ui/messages_list.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import pymysql
|
||||
|
||||
# MySQL 配置
|
||||
db_config = {
|
||||
'host': '192.168.2.32',
|
||||
'user': 'root',
|
||||
'password': 'lw123456',
|
||||
'database': 'message_archive'
|
||||
}
|
||||
|
||||
|
||||
# 获取消息列表,按时间倒序
|
||||
def get_messages(page=1, per_page=10):
|
||||
try:
|
||||
connection = pymysql.connect(**db_config)
|
||||
with connection.cursor() as cursor:
|
||||
offset = (page - 1) * per_page
|
||||
cursor.execute(
|
||||
"SELECT id, group_id, timestamp, sender, content FROM messages ORDER BY timestamp DESC LIMIT %s OFFSET %s",
|
||||
(per_page, offset))
|
||||
messages = cursor.fetchall()
|
||||
return messages
|
||||
except pymysql.MySQLError as e:
|
||||
print(f"数据库查询失败: {e}")
|
||||
return []
|
||||
finally:
|
||||
connection.close()
|
||||
|
||||
|
||||
# 获取消息总数
|
||||
def get_total_messages():
|
||||
try:
|
||||
connection = pymysql.connect(**db_config)
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute("SELECT COUNT(*) FROM messages")
|
||||
total = cursor.fetchone()[0]
|
||||
return total
|
||||
except pymysql.MySQLError as e:
|
||||
print(f"数据库查询失败: {e}")
|
||||
return 0
|
||||
finally:
|
||||
connection.close()
|
||||
161
xiuren/meitu_dl.py
Normal file
161
xiuren/meitu_dl.py
Normal file
@@ -0,0 +1,161 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import time
|
||||
import os
|
||||
import re
|
||||
from urllib.parse import urljoin
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
|
||||
from xiuren.xiuren_pdf import generate_pdf_from_images
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Referer': 'https://www.mntuce.com/'
|
||||
}
|
||||
|
||||
seen_posts = set()
|
||||
download_root = "xiuren" # 全局定义下载根目录
|
||||
|
||||
|
||||
def fetch_posts(base_url, posts_per_batch=10):
|
||||
posts = []
|
||||
page = 1
|
||||
|
||||
while len(posts) < posts_per_batch:
|
||||
url = f"{base_url}/page/{page}" if page > 1 else base_url
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
post_elements = soup.select('posts.posts-item.card h2.item-heading a')
|
||||
if not post_elements:
|
||||
print(f"页面 {page} 未找到帖子,停止爬取")
|
||||
break
|
||||
|
||||
for post in post_elements:
|
||||
post_url = urljoin(base_url, post.get('href'))
|
||||
post_title = post.get_text().strip()
|
||||
|
||||
# 检查帖子是否已下载
|
||||
match = re.search(r'No\.(\d+)', post_title)
|
||||
folder_name = match.group(1) if match else f"unknown_{len(posts) + 1}"
|
||||
folder_path = os.path.join(download_root, folder_name)
|
||||
|
||||
if post_url not in seen_posts:
|
||||
if os.path.exists(folder_path):
|
||||
seen_posts.add(post_url) # 标记为已见过,避免重复检查
|
||||
continue # 跳过已下载的帖子
|
||||
|
||||
seen_posts.add(post_url)
|
||||
posts.append({'title': post_title, 'url': post_url})
|
||||
if len(posts) == posts_per_batch:
|
||||
break # 凑齐所需数量后退出内层循环
|
||||
|
||||
page += 1
|
||||
time.sleep(1)
|
||||
except requests.RequestException as e:
|
||||
print(f"请求 {url} 失败: {e}")
|
||||
break
|
||||
|
||||
return posts
|
||||
|
||||
|
||||
def get_total_pages(post_url):
|
||||
try:
|
||||
response = requests.get(post_url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
page_links = soup.select('p.post-nav-links a.post-page-numbers')
|
||||
pages = [int(link.text) for link in page_links if link.text.isdigit()]
|
||||
return max(pages) if pages else 1
|
||||
except requests.RequestException as e:
|
||||
print(f"请求 {post_url} 失败,默认1页: {e}")
|
||||
return 1
|
||||
|
||||
|
||||
def fetch_images(post_url):
|
||||
images = []
|
||||
total_pages = get_total_pages(post_url)
|
||||
print(f"帖子 {post_url} 共有 {total_pages} 页")
|
||||
|
||||
options = Options()
|
||||
options.headless = True
|
||||
driver = webdriver.Chrome(options=options)
|
||||
|
||||
for page in range(1, total_pages + 1):
|
||||
url = f"{post_url}/{page}" if page > 1 else post_url
|
||||
driver.get(url)
|
||||
time.sleep(2)
|
||||
|
||||
img_elements = driver.find_elements(By.CSS_SELECTOR, 'figure.wp-block-gallery figure.wp-block-image img')
|
||||
for img in img_elements:
|
||||
img_url = img.get_attribute('src')
|
||||
if img_url and img_url.startswith('http'):
|
||||
images.append(img_url)
|
||||
|
||||
print(f"已爬取 {url},找到 {len(img_elements)} 张图片")
|
||||
|
||||
driver.quit()
|
||||
return images
|
||||
|
||||
|
||||
def download_image(img_url, folder_path, img_index):
|
||||
try:
|
||||
response = requests.get(img_url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
img = Image.open(BytesIO(response.content)).convert('RGB')
|
||||
img_name = f"{img_index:03d}.jpg"
|
||||
img_path = os.path.join(folder_path, img_name)
|
||||
|
||||
img.save(img_path, 'JPEG', quality=95)
|
||||
print(f"已下载并转换为JPG: {img_path}")
|
||||
except Exception as e:
|
||||
print(f"处理图片 {img_url} 失败: {e}")
|
||||
|
||||
|
||||
def meitu_dowload_pic():
|
||||
base_url = "https://www.mntuce.com/"
|
||||
|
||||
if not os.path.exists(download_root):
|
||||
os.makedirs(download_root)
|
||||
|
||||
print(f"开始爬取 {base_url} 的帖子...")
|
||||
posts = fetch_posts(base_url, 10)
|
||||
|
||||
if not posts:
|
||||
print("未获取到符合条件的帖子,请检查选择器或网络连接。")
|
||||
return
|
||||
|
||||
print(f"成功选择 {len(posts)} 个未下载的帖子,开始下载图片...")
|
||||
for i, post in enumerate(posts, 1):
|
||||
print(f"\n{i}. 标题: {post['title']}")
|
||||
print(f" 链接: {post['url']}")
|
||||
|
||||
match = re.search(r'No\.(\d+)', post['title'])
|
||||
folder_name = match.group(1) if match else f"unknown_{i}"
|
||||
folder_path = os.path.join(download_root, folder_name)
|
||||
|
||||
os.makedirs(folder_path, exist_ok=True) # 创建目录,exist_ok=True 避免重复创建报错
|
||||
images = fetch_images(post['url'])
|
||||
if images:
|
||||
print(f"共找到 {len(images)} 张图片,开始下载...")
|
||||
for idx, img_url in enumerate(images, 1):
|
||||
download_image(img_url, folder_path, idx)
|
||||
else:
|
||||
print("未找到图片,可能需要调整策略。")
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
# 将下载好的帖子生成PDF
|
||||
return generate_pdf_from_images(download_root)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
meitu_dowload_pic()
|
||||
Reference in New Issue
Block a user