加入美图网提取功能

2025-02-27 16:15:28 +08:00
parent 0a86f0c6ad
commit 00b05565bb
11 changed files with 656 additions and 2 deletions
--- a/base/func_deepseek.py
+++ b/base/func_deepseek.py
@@ -0,0 +1,132 @@
+import requests
+import json
+import logging
+
+from datetime import datetime
+
+
+class DeepSeek():
+    def __init__(self, conf: dict) -> None:
+        self.key = conf.get("key")
+        self.api = conf.get("api")
+        prompt = conf.get("prompt")
+        self.model = conf.get("model")
+        self.LOG = logging.getLogger("deepseek")
+        self.conversation_list = {}
+        self.system_content_msg = {"role": "system", "content": prompt}
+
+    def __repr__(self):
+        return 'DeepSeek'
+
+    def get_answer(self, question: str, wxid: str) -> str:
+        # 设置请求头
+        self.updateMessage(wxid, question, "user")
+        rsp = ""
+        try:
+            headers = {
+                "Content-Type": "application/json; charset=utf-8",
+                "Authorization": f"Bearer {self.key}"
+            }
+            # 设置请求的payload
+            data = {
+                "model": self.model,
+                "messages": [
+                    self.system_content_msg,
+                    {
+                        "role": "user",
+                        "content": f"{question}"
+                    }
+
+                ]
+            }
+            # 发送POST请求
+            response = requests.post(self.api, headers=headers, data=json.dumps(data), )
+            response.encoding = 'utf-8'
+
+            # 输出响应内容
+            print(response.status_code)
+            # print(response.text)
+            rsp = extract_content(response.text)
+            self.updateMessage(wxid, rsp, "assistant")
+        except Exception as e0:
+            self.LOG.error(f"发生未知错误：{str(e0)}")
+        return rsp
+
+    def updateMessage(self, wxid: str, question: str, role: str) -> None:
+        now_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
+
+        time_mk = "当需要回答时间时请直接参考回复:"
+        # 初始化聊天记录,组装系统信息
+        if wxid not in self.conversation_list.keys():
+            question_ = [
+                self.system_content_msg,
+                {"role": "system", "content": "" + time_mk + now_time}
+            ]
+            self.conversation_list[wxid] = question_
+
+        # 当前问题
+        content_question_ = {"role": role, "content": question}
+        self.conversation_list[wxid].append(content_question_)
+
+        for cont in self.conversation_list[wxid]:
+            if cont["role"] != "system":
+                continue
+            if cont["content"].startswith(time_mk):
+                cont["content"] = time_mk + now_time
+
+        # 只存储10条记录，超过滚动清除
+        i = len(self.conversation_list[wxid])
+        if i > 10:
+            print("滚动清除微信记录：" + wxid)
+            # 删除多余的记录，倒着删，且跳过第一个的系统消息
+            del self.conversation_list[wxid][1]
+
+    @staticmethod
+    def value_check(conf: dict) -> bool:
+        if conf:
+            if conf.get("key") and conf.get("api") and conf.get("prompt"):
+                return True
+        return False
+
+
+# 解析JSON
+def extract_content(data_string):
+    try:
+        data = json.loads(data_string)
+        # 提取content字段
+        content = data["choices"][0]["message"].get("content", "")
+        return content
+    except json.JSONDecodeError:
+        print("Invalid JSON")
+        return None
+
+
+if __name__ == '__main__':
+    from configuration import Config
+
+    config = Config().DEEPSEEK
+    if not config:
+        exit(0)
+
+    chat = DeepSeek(config)
+
+    while True:
+        q = input(">>> ")
+        try:
+            time_start = datetime.now()  # 记录开始时间
+            print(chat.get_answer(q, "Jyunere"))
+            time_end = datetime.now()  # 记录结束时间
+
+            print(f"{round((time_end - time_start).total_seconds(), 2)}s")  # 计算的时间差为程序的执行时间，单位为秒/s
+        except Exception as e:
+            print(e)
+
+#
+# [
+#     "windsurf/claude-3-5-sonnet",
+#     "windsurf/gpt4o",
+#     "windsurf/deepseek-chat",
+#     "windsurf/deepseek-reasoner",
+#     "windsurf/gpt4-o3-mini",
+#     "windsurf/gemini-2.0-flash",
+# ]
--- a/config.yaml
+++ b/config.yaml
@@ -92,6 +92,18 @@ claude:
  key: 46a5674a-e978-491b-a810-5d54605f2c36
  api: http://127.0.0.1:8080/v1/chat/completions  # 如果你不知道这是干嘛的，就不要改
  model: windsurf/gpt4o #
+  prompt: '你是一个信息归纳分析工程师，你根据提问会搜索相关资料。经过信息精炼之后返回内容。
+              请回复时以以下格式进行返回：
+              - 问题描述：
+              - 问题评价：分析问题的提出角度，如（财经、彩票、房产、股票、家居、教育、科技、社会、时尚、时政、体育、星座、游戏、娱乐）等
+              - 总结：经过300个字以内的优化返回，返回内容请进行一定程度的结构化，方便快速阅读'  # 根据需要对角色进行设定
+
+#  DEEPSEEK
+#
+deepseek:
+  key: sk-14bf1893e78040989a43b6f05c07974a
+  api: https://api.deepseek.com/chat/completions  # 如果你不知道这是干嘛的，就不要改
+  model: deepseek-chat #
  prompt: '你是一个信息归纳分析工程师，你根据提问会搜索相关资料。经过信息精炼之后返回内容。
              请回复时以以下格式进行返回：
              - 问题描述：
--- a/configuration.py
+++ b/configuration.py
@@ -38,3 +38,4 @@ class Config(object):
        self.BardAssistant = yconfig.get("bard", {})
        self.ZhiPu = yconfig.get("zhipu", {})
        self.CLAUDE = yconfig.get("claude", {})
+        self.DEEPSEEK =yconfig.get("deepseek",{})
--- a/robot.py
+++ b/robot.py
@@ -40,8 +40,8 @@ from message_report.write_db import write_to_db, generate_and_send_ranking
 from message_storage.message_to_db import archive_message, get_messages
 from message_summary.message_summary_4o import message_summary
 from sehuatang.shehuatang import pdf_file_path
+from xiuren.meitu_dl import meitu_dowload_pic
 from xiuren.random_pic import get_xiuren_pic
-from xiuren.xiuren_dl import xiuren_dowload_pic


 class Robot(Job):
@@ -538,7 +538,7 @@ class Robot(Job):

    def xiu_ren_download_task(self):
        try:
-            path = xiuren_dowload_pic()
+            path = meitu_dowload_pic()
            self.wcf.send_file(path, "45317011307@chatroom")
        except Exception as e:
            self.LOG.error(f"xiuren_dowload_pic error：{e}")
--- a/templates/group_auto_invite_ui.html
+++ b/templates/group_auto_invite_ui.html
@@ -0,0 +1,85 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>群组管理</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            background-color: #f4f4f4;
+            margin: 0;
+            padding: 20px;
+        }
+        h1 {
+            text-align: center;
+            color: #333;
+        }
+        .container {
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 20px;
+            background-color: white;
+            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+        }
+        .form-group {
+            margin-bottom: 15px;
+        }
+        input[type="text"], input[type="submit"], select {
+            padding: 10px;
+            width: 100%;
+            box-sizing: border-box;
+        }
+        input[type="submit"] {
+            background-color: #007bff;
+            color: white;
+            border: none;
+            cursor: pointer;
+            font-size: 16px;
+        }
+        input[type="submit"]:hover {
+            background-color: #0056b3;
+        }
+        .result {
+            margin-top: 20px;
+            padding: 10px;
+            background-color: #e7f4e7;
+            border: 1px solid #d3f8d3;
+            border-radius: 5px;
+        }
+    </style>
+</head>
+<body>
+
+    <h1>群组管理</h1>
+
+    <div class="container">
+        <form method="POST">
+            <div class="form-group">
+                <label for="key">Key:</label>
+                <input type="text" id="key" name="key" >
+            </div>
+            <div class="form-group">
+                <label for="group_id">Group ID:</label>
+                <input type="text" id="group_id" name="group_id" >
+            </div>
+            <div class="form-group">
+                <label for="action">Action:</label>
+                <select id="action" name="action">
+                    <option value="add">添加群组ID</option>
+                    <option value="del">删除群组ID</option>
+                    <option value="get">获取所有群组ID</option>
+                    <option value="get_first">获取第一个群组ID</option>
+                    <option value="get_instructions">获取群组指令</option>
+                </select>
+            </div>
+            <input type="submit" value="提交">
+        </form>
+
+        {% if result %}
+            <div class="result">{{ result }}</div>
+        {% endif %}
+    </div>
+
+</body>
+</html>
--- a/templates/index.html
+++ b/templates/index.html
@@ -0,0 +1,50 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>系统菜单</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            background-color: #f4f4f4;
+            margin: 0;
+            padding: 20px;
+        }
+        h1 {
+            text-align: center;
+            color: #333;
+        }
+        .container {
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 20px;
+            background-color: white;
+            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+        }
+        button {
+            padding: 10px 20px;
+            margin: 10px;
+            background-color: #007bff;
+            color: white;
+            border: none;
+            border-radius: 5px;
+            cursor: pointer;
+            font-size: 16px;
+        }
+        button:hover {
+            background-color: #0056b3;
+        }
+    </style>
+</head>
+<body>
+
+    <h1>欢迎进入系统</h1>
+
+    <div class="container">
+        <button onclick="window.location.href='/redis_operations'">群组管理</button>
+        <button onclick="window.location.href='/messages'">查看消息列表</button>
+    </div>
+
+</body>
+</html>
--- a/templates/message_list.html
+++ b/templates/message_list.html
@@ -0,0 +1,110 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>消息列表</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            background-color: #f4f4f4;
+            margin: 0;
+            padding: 20px;
+        }
+        h1 {
+            text-align: center;
+            color: #333;
+        }
+        .container {
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 20px;
+            background-color: white;
+            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+        }
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            margin-bottom: 20px;
+        }
+        table th, table td {
+            padding: 10px;
+            text-align: left;
+            border: 1px solid #ddd;
+        }
+        table th {
+            background-color: #f4f4f4;
+        }
+        .table-container {
+            max-height: 400px;  /* 设置表格的最大高度 */
+            overflow-y: auto;  /* 启用垂直滚动条 */
+        }
+        .pagination {
+            display: flex;
+            justify-content: center;
+            margin-top: 20px;
+        }
+        .pagination a {
+            padding: 8px 16px;
+            margin: 0 5px;
+            text-decoration: none;
+            color: #007bff;
+            border: 1px solid #ddd;
+            border-radius: 5px;
+        }
+        .pagination a:hover {
+            background-color: #f1f1f1;
+        }
+        .pagination span {
+            padding: 8px 16px;
+            margin: 0 5px;
+        }
+    </style>
+</head>
+<body>
+
+    <h1>消息列表</h1>
+
+    <div class="container">
+        <div class="table-container">
+            <table>
+                <thead>
+                    <tr>
+                        <th>ID</th>
+                        <th>群ID</th>
+                        <th>时间戳</th>
+                        <th>发送者</th>
+                        <th>内容</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    {% for message in messages %}
+                        <tr>
+                            <td>{{ message[0] }}</td>
+                            <td>{{ message[1] }}</td>
+                            <td>{{ message[2] }}</td>
+                            <td>{{ message[3] }}</td>
+                            <td>{{ message[4] }}</td>
+                        </tr>
+                    {% endfor %}
+                </tbody>
+            </table>
+        </div>
+
+        <div class="pagination">
+            {% if page > 1 %}
+                <a href="/messages?page=1">首页</a>
+                <a href="/messages?page={{ page - 1 }}">上一页</a>
+            {% endif %}
+
+            <span>第 {{ page }} 页 / {{ total_pages }} 页</span>
+
+            {% if page < total_pages %}
+                <a href="/messages?page={{ page + 1 }}">下一页</a>
+                <a href="/messages?page={{ total_pages }}">末页</a>
+            {% endif %}
+        </div>
+    </div>
+
+</body>
+</html>
--- a/ui/README.md
+++ b/ui/README.md
@@ -0,0 +1 @@
+# 制作UI进行群管理，群功能管理，不使用指令完成。
--- a/ui/message_ui.py
+++ b/ui/message_ui.py
@@ -0,0 +1,60 @@
+from flask import Flask, render_template, request, jsonify
+
+import os
+
+from group_auto.group_auto_invite import add_mapping, del_mapping, get_first_group_id, get_group_ids
+from ui.messages_list import get_total_messages, get_messages
+
+# 设置 Flask 实例化时指定模板文件夹路径
+app = Flask(__name__, template_folder=os.path.join(os.path.dirname(__file__), '..', 'templates'))
+
+
+# 主菜单页面
+@app.route('/')
+def index():
+    return render_template('index.html')
+
+
+# Redis 操作页面
+@app.route('/redis_operations', methods=['GET', 'POST'])
+def redis_operations():
+    if request.method == 'POST':
+        key = request.form.get('key')
+        group_id = request.form.get('group_id')
+        action = request.form.get('action')
+
+        result = ''
+        if action == 'add':
+            result = add_mapping(key, group_id)
+        elif action == 'del':
+            result = del_mapping(key, group_id)
+        elif action == 'get':
+            result = get_group_ids(key)
+        elif action == 'get_first':
+            result = get_first_group_id(key)
+
+        return render_template('group_auto_invite_ui.html', result=result)
+
+    return render_template('group_auto_invite_ui.html', result='')
+
+
+# 显示消息列表（分页）
+@app.route('/messages', methods=['GET'])
+def messages():
+    page = int(request.args.get('page', 1))  # 获取当前页，默认为第一页
+    per_page = 10  # 每页显示10条数据
+    messages = get_messages(page, per_page)  # 获取指定页的数据
+    total = get_total_messages()  # 获取总的消息数量
+    total_pages = (total // per_page) + (1 if total % per_page > 0 else 0)  # 总页数
+
+    # 分页控制，确保当前页数在有效范围内
+    if page > total_pages:
+        page = total_pages
+    if page < 1:
+        page = 1
+
+    return render_template('message_list.html', messages=messages, page=page, total_pages=total_pages)
+
+
+if __name__ == '__main__':
+    app.run(debug=True)
--- a/ui/messages_list.py
+++ b/ui/messages_list.py
@@ -0,0 +1,42 @@
+import pymysql
+
+# MySQL 配置
+db_config = {
+    'host': '192.168.2.32',
+    'user': 'root',
+    'password': 'lw123456',
+    'database': 'message_archive'
+}
+
+
+# 获取消息列表，按时间倒序
+def get_messages(page=1, per_page=10):
+    try:
+        connection = pymysql.connect(**db_config)
+        with connection.cursor() as cursor:
+            offset = (page - 1) * per_page
+            cursor.execute(
+                "SELECT id, group_id, timestamp, sender, content FROM messages ORDER BY timestamp DESC LIMIT %s OFFSET %s",
+                (per_page, offset))
+            messages = cursor.fetchall()
+            return messages
+    except pymysql.MySQLError as e:
+        print(f"数据库查询失败: {e}")
+        return []
+    finally:
+        connection.close()
+
+
+# 获取消息总数
+def get_total_messages():
+    try:
+        connection = pymysql.connect(**db_config)
+        with connection.cursor() as cursor:
+            cursor.execute("SELECT COUNT(*) FROM messages")
+            total = cursor.fetchone()[0]
+            return total
+    except pymysql.MySQLError as e:
+        print(f"数据库查询失败: {e}")
+        return 0
+    finally:
+        connection.close()
--- a/xiuren/meitu_dl.py
+++ b/xiuren/meitu_dl.py
@@ -0,0 +1,161 @@
+import requests
+from bs4 import BeautifulSoup
+import time
+import os
+import re
+from urllib.parse import urljoin
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
+from PIL import Image
+from io import BytesIO
+
+from xiuren.xiuren_pdf import generate_pdf_from_images
+
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0.4472.124 Safari/537.36',
+    'Referer': 'https://www.mntuce.com/'
+}
+
+seen_posts = set()
+download_root = "xiuren"  # 全局定义下载根目录
+
+
+def fetch_posts(base_url, posts_per_batch=10):
+    posts = []
+    page = 1
+
+    while len(posts) < posts_per_batch:
+        url = f"{base_url}/page/{page}" if page > 1 else base_url
+        try:
+            response = requests.get(url, headers=headers, timeout=10)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.text, 'html.parser')
+
+            post_elements = soup.select('posts.posts-item.card h2.item-heading a')
+            if not post_elements:
+                print(f"页面 {page} 未找到帖子，停止爬取")
+                break
+
+            for post in post_elements:
+                post_url = urljoin(base_url, post.get('href'))
+                post_title = post.get_text().strip()
+
+                # 检查帖子是否已下载
+                match = re.search(r'No\.(\d+)', post_title)
+                folder_name = match.group(1) if match else f"unknown_{len(posts) + 1}"
+                folder_path = os.path.join(download_root, folder_name)
+
+                if post_url not in seen_posts:
+                    if os.path.exists(folder_path):
+                        seen_posts.add(post_url)  # 标记为已见过，避免重复检查
+                        continue  # 跳过已下载的帖子
+
+                    seen_posts.add(post_url)
+                    posts.append({'title': post_title, 'url': post_url})
+                    if len(posts) == posts_per_batch:
+                        break  # 凑齐所需数量后退出内层循环
+
+            page += 1
+            time.sleep(1)
+        except requests.RequestException as e:
+            print(f"请求 {url} 失败: {e}")
+            break
+
+    return posts
+
+
+def get_total_pages(post_url):
+    try:
+        response = requests.get(post_url, headers=headers, timeout=10)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+
+        page_links = soup.select('p.post-nav-links a.post-page-numbers')
+        pages = [int(link.text) for link in page_links if link.text.isdigit()]
+        return max(pages) if pages else 1
+    except requests.RequestException as e:
+        print(f"请求 {post_url} 失败，默认1页: {e}")
+        return 1
+
+
+def fetch_images(post_url):
+    images = []
+    total_pages = get_total_pages(post_url)
+    print(f"帖子 {post_url} 共有 {total_pages} 页")
+
+    options = Options()
+    options.headless = True
+    driver = webdriver.Chrome(options=options)
+
+    for page in range(1, total_pages + 1):
+        url = f"{post_url}/{page}" if page > 1 else post_url
+        driver.get(url)
+        time.sleep(2)
+
+        img_elements = driver.find_elements(By.CSS_SELECTOR, 'figure.wp-block-gallery figure.wp-block-image img')
+        for img in img_elements:
+            img_url = img.get_attribute('src')
+            if img_url and img_url.startswith('http'):
+                images.append(img_url)
+
+        print(f"已爬取 {url}，找到 {len(img_elements)} 张图片")
+
+    driver.quit()
+    return images
+
+
+def download_image(img_url, folder_path, img_index):
+    try:
+        response = requests.get(img_url, headers=headers, timeout=10)
+        response.raise_for_status()
+
+        img = Image.open(BytesIO(response.content)).convert('RGB')
+        img_name = f"{img_index:03d}.jpg"
+        img_path = os.path.join(folder_path, img_name)
+
+        img.save(img_path, 'JPEG', quality=95)
+        print(f"已下载并转换为JPG: {img_path}")
+    except Exception as e:
+        print(f"处理图片 {img_url} 失败: {e}")
+
+
+def meitu_dowload_pic():
+    base_url = "https://www.mntuce.com/"
+
+    if not os.path.exists(download_root):
+        os.makedirs(download_root)
+
+    print(f"开始爬取 {base_url} 的帖子...")
+    posts = fetch_posts(base_url, 10)
+
+    if not posts:
+        print("未获取到符合条件的帖子，请检查选择器或网络连接。")
+        return
+
+    print(f"成功选择 {len(posts)} 个未下载的帖子，开始下载图片...")
+    for i, post in enumerate(posts, 1):
+        print(f"\n{i}. 标题: {post['title']}")
+        print(f"   链接: {post['url']}")
+
+        match = re.search(r'No\.(\d+)', post['title'])
+        folder_name = match.group(1) if match else f"unknown_{i}"
+        folder_path = os.path.join(download_root, folder_name)
+
+        os.makedirs(folder_path, exist_ok=True)  # 创建目录，exist_ok=True 避免重复创建报错
+        images = fetch_images(post['url'])
+        if images:
+            print(f"共找到 {len(images)} 张图片，开始下载...")
+            for idx, img_url in enumerate(images, 1):
+                download_image(img_url, folder_path, idx)
+        else:
+            print("未找到图片，可能需要调整策略。")
+
+        time.sleep(1)
+
+        # 将下载好的帖子生成PDF
+        return generate_pdf_from_images(download_root)
+
+
+if __name__ == "__main__":
+    meitu_dowload_pic()
				`@@ -0,0 +1 @@`
				`# 制作UI进行群管理，群功能管理，不使用指令完成。`