加入美图网提取功能

This commit is contained in:
liuwei
2025-02-27 16:15:28 +08:00
parent 0a86f0c6ad
commit 00b05565bb
11 changed files with 656 additions and 2 deletions

132
base/func_deepseek.py Normal file
View File

@@ -0,0 +1,132 @@
import requests
import json
import logging
from datetime import datetime
class DeepSeek():
def __init__(self, conf: dict) -> None:
self.key = conf.get("key")
self.api = conf.get("api")
prompt = conf.get("prompt")
self.model = conf.get("model")
self.LOG = logging.getLogger("deepseek")
self.conversation_list = {}
self.system_content_msg = {"role": "system", "content": prompt}
def __repr__(self):
return 'DeepSeek'
def get_answer(self, question: str, wxid: str) -> str:
# 设置请求头
self.updateMessage(wxid, question, "user")
rsp = ""
try:
headers = {
"Content-Type": "application/json; charset=utf-8",
"Authorization": f"Bearer {self.key}"
}
# 设置请求的payload
data = {
"model": self.model,
"messages": [
self.system_content_msg,
{
"role": "user",
"content": f"{question}"
}
]
}
# 发送POST请求
response = requests.post(self.api, headers=headers, data=json.dumps(data), )
response.encoding = 'utf-8'
# 输出响应内容
print(response.status_code)
# print(response.text)
rsp = extract_content(response.text)
self.updateMessage(wxid, rsp, "assistant")
except Exception as e0:
self.LOG.error(f"发生未知错误:{str(e0)}")
return rsp
def updateMessage(self, wxid: str, question: str, role: str) -> None:
now_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
time_mk = "当需要回答时间时请直接参考回复:"
# 初始化聊天记录,组装系统信息
if wxid not in self.conversation_list.keys():
question_ = [
self.system_content_msg,
{"role": "system", "content": "" + time_mk + now_time}
]
self.conversation_list[wxid] = question_
# 当前问题
content_question_ = {"role": role, "content": question}
self.conversation_list[wxid].append(content_question_)
for cont in self.conversation_list[wxid]:
if cont["role"] != "system":
continue
if cont["content"].startswith(time_mk):
cont["content"] = time_mk + now_time
# 只存储10条记录超过滚动清除
i = len(self.conversation_list[wxid])
if i > 10:
print("滚动清除微信记录:" + wxid)
# 删除多余的记录,倒着删,且跳过第一个的系统消息
del self.conversation_list[wxid][1]
@staticmethod
def value_check(conf: dict) -> bool:
if conf:
if conf.get("key") and conf.get("api") and conf.get("prompt"):
return True
return False
# 解析JSON
def extract_content(data_string):
try:
data = json.loads(data_string)
# 提取content字段
content = data["choices"][0]["message"].get("content", "")
return content
except json.JSONDecodeError:
print("Invalid JSON")
return None
if __name__ == '__main__':
from configuration import Config
config = Config().DEEPSEEK
if not config:
exit(0)
chat = DeepSeek(config)
while True:
q = input(">>> ")
try:
time_start = datetime.now() # 记录开始时间
print(chat.get_answer(q, "Jyunere"))
time_end = datetime.now() # 记录结束时间
print(f"{round((time_end - time_start).total_seconds(), 2)}s") # 计算的时间差为程序的执行时间,单位为秒/s
except Exception as e:
print(e)
#
# [
# "windsurf/claude-3-5-sonnet",
# "windsurf/gpt4o",
# "windsurf/deepseek-chat",
# "windsurf/deepseek-reasoner",
# "windsurf/gpt4-o3-mini",
# "windsurf/gemini-2.0-flash",
# ]

View File

@@ -92,6 +92,18 @@ claude:
key: 46a5674a-e978-491b-a810-5d54605f2c36
api: http://127.0.0.1:8080/v1/chat/completions # 如果你不知道这是干嘛的,就不要改
model: windsurf/gpt4o #
prompt: '你是一个信息归纳分析工程师,你根据提问会搜索相关资料。经过信息精炼之后返回内容。
请回复时以以下格式进行返回:
- 问题描述:
- 问题评价:分析问题的提出角度,如(财经、彩票、房产、股票、家居、教育、科技、社会、时尚、时政、体育、星座、游戏、娱乐)等
- 总结经过300个字以内的优化返回返回内容请进行一定程度的结构化方便快速阅读' # 根据需要对角色进行设定
# DEEPSEEK
#
deepseek:
key: sk-14bf1893e78040989a43b6f05c07974a
api: https://api.deepseek.com/chat/completions # 如果你不知道这是干嘛的,就不要改
model: deepseek-chat #
prompt: '你是一个信息归纳分析工程师,你根据提问会搜索相关资料。经过信息精炼之后返回内容。
请回复时以以下格式进行返回:
- 问题描述:

View File

@@ -38,3 +38,4 @@ class Config(object):
self.BardAssistant = yconfig.get("bard", {})
self.ZhiPu = yconfig.get("zhipu", {})
self.CLAUDE = yconfig.get("claude", {})
self.DEEPSEEK =yconfig.get("deepseek",{})

View File

@@ -40,8 +40,8 @@ from message_report.write_db import write_to_db, generate_and_send_ranking
from message_storage.message_to_db import archive_message, get_messages
from message_summary.message_summary_4o import message_summary
from sehuatang.shehuatang import pdf_file_path
from xiuren.meitu_dl import meitu_dowload_pic
from xiuren.random_pic import get_xiuren_pic
from xiuren.xiuren_dl import xiuren_dowload_pic
class Robot(Job):
@@ -538,7 +538,7 @@ class Robot(Job):
def xiu_ren_download_task(self):
try:
path = xiuren_dowload_pic()
path = meitu_dowload_pic()
self.wcf.send_file(path, "45317011307@chatroom")
except Exception as e:
self.LOG.error(f"xiuren_dowload_pic error{e}")

View File

@@ -0,0 +1,85 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>群组管理</title>
<style>
body {
font-family: Arial, sans-serif;
background-color: #f4f4f4;
margin: 0;
padding: 20px;
}
h1 {
text-align: center;
color: #333;
}
.container {
max-width: 800px;
margin: 0 auto;
padding: 20px;
background-color: white;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}
.form-group {
margin-bottom: 15px;
}
input[type="text"], input[type="submit"], select {
padding: 10px;
width: 100%;
box-sizing: border-box;
}
input[type="submit"] {
background-color: #007bff;
color: white;
border: none;
cursor: pointer;
font-size: 16px;
}
input[type="submit"]:hover {
background-color: #0056b3;
}
.result {
margin-top: 20px;
padding: 10px;
background-color: #e7f4e7;
border: 1px solid #d3f8d3;
border-radius: 5px;
}
</style>
</head>
<body>
<h1>群组管理</h1>
<div class="container">
<form method="POST">
<div class="form-group">
<label for="key">Key:</label>
<input type="text" id="key" name="key" >
</div>
<div class="form-group">
<label for="group_id">Group ID:</label>
<input type="text" id="group_id" name="group_id" >
</div>
<div class="form-group">
<label for="action">Action:</label>
<select id="action" name="action">
<option value="add">添加群组ID</option>
<option value="del">删除群组ID</option>
<option value="get">获取所有群组ID</option>
<option value="get_first">获取第一个群组ID</option>
<option value="get_instructions">获取群组指令</option>
</select>
</div>
<input type="submit" value="提交">
</form>
{% if result %}
<div class="result">{{ result }}</div>
{% endif %}
</div>
</body>
</html>

50
templates/index.html Normal file
View File

@@ -0,0 +1,50 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>系统菜单</title>
<style>
body {
font-family: Arial, sans-serif;
background-color: #f4f4f4;
margin: 0;
padding: 20px;
}
h1 {
text-align: center;
color: #333;
}
.container {
max-width: 800px;
margin: 0 auto;
padding: 20px;
background-color: white;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}
button {
padding: 10px 20px;
margin: 10px;
background-color: #007bff;
color: white;
border: none;
border-radius: 5px;
cursor: pointer;
font-size: 16px;
}
button:hover {
background-color: #0056b3;
}
</style>
</head>
<body>
<h1>欢迎进入系统</h1>
<div class="container">
<button onclick="window.location.href='/redis_operations'">群组管理</button>
<button onclick="window.location.href='/messages'">查看消息列表</button>
</div>
</body>
</html>

110
templates/message_list.html Normal file
View File

@@ -0,0 +1,110 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>消息列表</title>
<style>
body {
font-family: Arial, sans-serif;
background-color: #f4f4f4;
margin: 0;
padding: 20px;
}
h1 {
text-align: center;
color: #333;
}
.container {
max-width: 800px;
margin: 0 auto;
padding: 20px;
background-color: white;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}
table {
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
}
table th, table td {
padding: 10px;
text-align: left;
border: 1px solid #ddd;
}
table th {
background-color: #f4f4f4;
}
.table-container {
max-height: 400px; /* 设置表格的最大高度 */
overflow-y: auto; /* 启用垂直滚动条 */
}
.pagination {
display: flex;
justify-content: center;
margin-top: 20px;
}
.pagination a {
padding: 8px 16px;
margin: 0 5px;
text-decoration: none;
color: #007bff;
border: 1px solid #ddd;
border-radius: 5px;
}
.pagination a:hover {
background-color: #f1f1f1;
}
.pagination span {
padding: 8px 16px;
margin: 0 5px;
}
</style>
</head>
<body>
<h1>消息列表</h1>
<div class="container">
<div class="table-container">
<table>
<thead>
<tr>
<th>ID</th>
<th>群ID</th>
<th>时间戳</th>
<th>发送者</th>
<th>内容</th>
</tr>
</thead>
<tbody>
{% for message in messages %}
<tr>
<td>{{ message[0] }}</td>
<td>{{ message[1] }}</td>
<td>{{ message[2] }}</td>
<td>{{ message[3] }}</td>
<td>{{ message[4] }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="pagination">
{% if page > 1 %}
<a href="/messages?page=1">首页</a>
<a href="/messages?page={{ page - 1 }}">上一页</a>
{% endif %}
<span>第 {{ page }} 页 / {{ total_pages }} 页</span>
{% if page < total_pages %}
<a href="/messages?page={{ page + 1 }}">下一页</a>
<a href="/messages?page={{ total_pages }}">末页</a>
{% endif %}
</div>
</div>
</body>
</html>

1
ui/README.md Normal file
View File

@@ -0,0 +1 @@
# 制作UI进行群管理群功能管理不使用指令完成。

60
ui/message_ui.py Normal file
View File

@@ -0,0 +1,60 @@
from flask import Flask, render_template, request, jsonify
import os
from group_auto.group_auto_invite import add_mapping, del_mapping, get_first_group_id, get_group_ids
from ui.messages_list import get_total_messages, get_messages
# 设置 Flask 实例化时指定模板文件夹路径
app = Flask(__name__, template_folder=os.path.join(os.path.dirname(__file__), '..', 'templates'))
# 主菜单页面
@app.route('/')
def index():
return render_template('index.html')
# Redis 操作页面
@app.route('/redis_operations', methods=['GET', 'POST'])
def redis_operations():
if request.method == 'POST':
key = request.form.get('key')
group_id = request.form.get('group_id')
action = request.form.get('action')
result = ''
if action == 'add':
result = add_mapping(key, group_id)
elif action == 'del':
result = del_mapping(key, group_id)
elif action == 'get':
result = get_group_ids(key)
elif action == 'get_first':
result = get_first_group_id(key)
return render_template('group_auto_invite_ui.html', result=result)
return render_template('group_auto_invite_ui.html', result='')
# 显示消息列表(分页)
@app.route('/messages', methods=['GET'])
def messages():
page = int(request.args.get('page', 1)) # 获取当前页,默认为第一页
per_page = 10 # 每页显示10条数据
messages = get_messages(page, per_page) # 获取指定页的数据
total = get_total_messages() # 获取总的消息数量
total_pages = (total // per_page) + (1 if total % per_page > 0 else 0) # 总页数
# 分页控制,确保当前页数在有效范围内
if page > total_pages:
page = total_pages
if page < 1:
page = 1
return render_template('message_list.html', messages=messages, page=page, total_pages=total_pages)
if __name__ == '__main__':
app.run(debug=True)

42
ui/messages_list.py Normal file
View File

@@ -0,0 +1,42 @@
import pymysql
# MySQL 配置
db_config = {
'host': '192.168.2.32',
'user': 'root',
'password': 'lw123456',
'database': 'message_archive'
}
# 获取消息列表,按时间倒序
def get_messages(page=1, per_page=10):
try:
connection = pymysql.connect(**db_config)
with connection.cursor() as cursor:
offset = (page - 1) * per_page
cursor.execute(
"SELECT id, group_id, timestamp, sender, content FROM messages ORDER BY timestamp DESC LIMIT %s OFFSET %s",
(per_page, offset))
messages = cursor.fetchall()
return messages
except pymysql.MySQLError as e:
print(f"数据库查询失败: {e}")
return []
finally:
connection.close()
# 获取消息总数
def get_total_messages():
try:
connection = pymysql.connect(**db_config)
with connection.cursor() as cursor:
cursor.execute("SELECT COUNT(*) FROM messages")
total = cursor.fetchone()[0]
return total
except pymysql.MySQLError as e:
print(f"数据库查询失败: {e}")
return 0
finally:
connection.close()

161
xiuren/meitu_dl.py Normal file
View File

@@ -0,0 +1,161 @@
import requests
from bs4 import BeautifulSoup
import time
import os
import re
from urllib.parse import urljoin
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from PIL import Image
from io import BytesIO
from xiuren.xiuren_pdf import generate_pdf_from_images
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0.4472.124 Safari/537.36',
'Referer': 'https://www.mntuce.com/'
}
seen_posts = set()
download_root = "xiuren" # 全局定义下载根目录
def fetch_posts(base_url, posts_per_batch=10):
posts = []
page = 1
while len(posts) < posts_per_batch:
url = f"{base_url}/page/{page}" if page > 1 else base_url
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
post_elements = soup.select('posts.posts-item.card h2.item-heading a')
if not post_elements:
print(f"页面 {page} 未找到帖子,停止爬取")
break
for post in post_elements:
post_url = urljoin(base_url, post.get('href'))
post_title = post.get_text().strip()
# 检查帖子是否已下载
match = re.search(r'No\.(\d+)', post_title)
folder_name = match.group(1) if match else f"unknown_{len(posts) + 1}"
folder_path = os.path.join(download_root, folder_name)
if post_url not in seen_posts:
if os.path.exists(folder_path):
seen_posts.add(post_url) # 标记为已见过,避免重复检查
continue # 跳过已下载的帖子
seen_posts.add(post_url)
posts.append({'title': post_title, 'url': post_url})
if len(posts) == posts_per_batch:
break # 凑齐所需数量后退出内层循环
page += 1
time.sleep(1)
except requests.RequestException as e:
print(f"请求 {url} 失败: {e}")
break
return posts
def get_total_pages(post_url):
try:
response = requests.get(post_url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
page_links = soup.select('p.post-nav-links a.post-page-numbers')
pages = [int(link.text) for link in page_links if link.text.isdigit()]
return max(pages) if pages else 1
except requests.RequestException as e:
print(f"请求 {post_url} 失败默认1页: {e}")
return 1
def fetch_images(post_url):
images = []
total_pages = get_total_pages(post_url)
print(f"帖子 {post_url} 共有 {total_pages}")
options = Options()
options.headless = True
driver = webdriver.Chrome(options=options)
for page in range(1, total_pages + 1):
url = f"{post_url}/{page}" if page > 1 else post_url
driver.get(url)
time.sleep(2)
img_elements = driver.find_elements(By.CSS_SELECTOR, 'figure.wp-block-gallery figure.wp-block-image img')
for img in img_elements:
img_url = img.get_attribute('src')
if img_url and img_url.startswith('http'):
images.append(img_url)
print(f"已爬取 {url},找到 {len(img_elements)} 张图片")
driver.quit()
return images
def download_image(img_url, folder_path, img_index):
try:
response = requests.get(img_url, headers=headers, timeout=10)
response.raise_for_status()
img = Image.open(BytesIO(response.content)).convert('RGB')
img_name = f"{img_index:03d}.jpg"
img_path = os.path.join(folder_path, img_name)
img.save(img_path, 'JPEG', quality=95)
print(f"已下载并转换为JPG: {img_path}")
except Exception as e:
print(f"处理图片 {img_url} 失败: {e}")
def meitu_dowload_pic():
base_url = "https://www.mntuce.com/"
if not os.path.exists(download_root):
os.makedirs(download_root)
print(f"开始爬取 {base_url} 的帖子...")
posts = fetch_posts(base_url, 10)
if not posts:
print("未获取到符合条件的帖子,请检查选择器或网络连接。")
return
print(f"成功选择 {len(posts)} 个未下载的帖子,开始下载图片...")
for i, post in enumerate(posts, 1):
print(f"\n{i}. 标题: {post['title']}")
print(f" 链接: {post['url']}")
match = re.search(r'No\.(\d+)', post['title'])
folder_name = match.group(1) if match else f"unknown_{i}"
folder_path = os.path.join(download_root, folder_name)
os.makedirs(folder_path, exist_ok=True) # 创建目录exist_ok=True 避免重复创建报错
images = fetch_images(post['url'])
if images:
print(f"共找到 {len(images)} 张图片,开始下载...")
for idx, img_url in enumerate(images, 1):
download_image(img_url, folder_path, idx)
else:
print("未找到图片,可能需要调整策略。")
time.sleep(1)
# 将下载好的帖子生成PDF
return generate_pdf_from_images(download_root)
if __name__ == "__main__":
meitu_dowload_pic()