尝试dify读取图片！

2026-02-26 17:25:15 +08:00
parent 0315b064c5
commit 9582d5ed80
1 changed files with 195 additions and 7 deletions
--- a/plugins/dify/main.py
+++ b/plugins/dify/main.py
@@ -5,6 +5,9 @@ import json
 import time
 import re  # 添加re模块导入
 import asyncio
 import base64
 import html
 import xml.etree.ElementTree as ET
 from typing import Dict, Any, List, Optional, Tuple
 from pathlib import Path
@@ -20,6 +23,7 @@ from utils.decorator.points_decorator import plugin_points_cost
 from utils.media_downloader import MediaDownloader
 from utils.string_utils import remove_trailing_content, remove_grok_render_tags
 from wechat_ipad import WechatAPIClient
 from wechat_ipad.models.message import MessageType
 import aiohttp
 # 常见的图片和视频文件扩展名
@@ -148,6 +152,8 @@ class DifyPlugin(MessagePluginInterface):
        # 获取目标接收者（群聊为群ID，私聊为个人wxid）
        target = roomid if roomid else sender
        self.LOG.debug(
            f"消息上下文: sender={sender}, roomid={roomid}, target={target}, is_at={message.get('is_at', False)}")
        # 检查权限
        if roomid and gbm.get_group_permission(target, self.feature) == PermissionStatus.DISABLED:
@@ -158,7 +164,7 @@ class DifyPlugin(MessagePluginInterface):
            # 去除@的人和空格等字符
            query = re.sub(r"@.*?[\u2005|\s]", "", content).strip()
            if not query:
-                await bot.send_at_message(target, "请在@我的同时提供问题内容", [sender])
+                # await bot.send_at_message(target, "请在@我的同时提供问题内容", [sender])
                return False, "没有提供问题内容"
        else:
            # 处理命令消息
@@ -168,16 +174,54 @@ class DifyPlugin(MessagePluginInterface):
                return False, "命令格式错误"
            query = parts[1].strip()
        self.LOG.debug(f"解析请求: query_len={len(query)} query_preview={query[:120]}")
        try:
            # 发送等待消息
            # client_msg_id, create_time, new_msg_id = await bot.send_text_message(
            #     target, "⏳AI 正在加油，请稍候… 😊", sender if roomid else "")
            # revoke.add_message_to_revoke(target, client_msg_id, create_time, new_msg_id, 3)
-            # TODO，支持@ 消息（比如：图片引用，文本聊天引用，视频引用）
+            dify_files = []
-            # 需要根据引用的信息，进行相关资源下载。如果是表情，则不理解。
+            quote_payload = self._parse_quote_payload(message.get("full_wx_msg"))
-            # 调用Dify API获取回复
+            if quote_payload:
-            success, response = await self._chat_with_dify(target, sender, query)
+                ref_type = quote_payload.get("ref_type", 0)
                ref_content = quote_payload.get("ref_content", "").strip()
                title = quote_payload.get("title", "").strip()
                self.LOG.debug(
                    f"检测到引用消息: type={ref_type}, title_preview={title[:80]}, ref_len={len(ref_content)}")
                if ref_type == MessageType.TEXT.value:
                    if ref_content:
                        self.LOG.debug("使用引用文本作为问题")
                        query = ref_content
                elif ref_type == MessageType.IMAGE.value:
                    if title:
                        title_query = re.sub(r"@.*?[\u2005|\s]", "", title).strip()
                        if title_query:
                            query = title_query
                    self.LOG.debug("检测到引用图片，开始下载与上传")
                    image_path = await self._download_quote_image(bot, ref_content)
                    if not image_path:
                        return False, "图片引用解析失败"
                    upload_id = await self._upload_file_to_dify(image_path, sender)
                    if not upload_id:
                        return False, "图片上传失败"
                    self.LOG.debug(f"图片上传完成: upload_id={upload_id}")
                    dify_files.append({
                        "type": "image",
                        "transfer_method": "local_file",
                        "upload_file_id": upload_id
                    })
                elif ref_type == MessageType.VIDEO.value:
                    return False, "暂不支持视频引用"
                elif ref_type in (MessageType.EMOTICON.value, MessageType.EMOJI.value):
                    return False, "暂不支持表情引用"
            success, response = await self._chat_with_dify(target, sender, query, dify_files)
            if not success:
                return False, response
@@ -232,7 +276,147 @@ class DifyPlugin(MessagePluginInterface):
            self.LOG.error(f"发送响应消息时出错: {e}")
            return False, f"发送响应失败: {e}"
-    async def _chat_with_dify(self, session_id: str, user_id: str, query: str) -> Tuple[bool, Optional[str]]:
+    def _parse_quote_payload(self, full_msg: Any) -> Optional[Dict[str, Any]]:
        if not full_msg or not getattr(full_msg, "content", None):
            return None
        xml_content = getattr(full_msg.content, "xml_content", "")
        if not xml_content:
            return None
        try:
            root = ET.fromstring(xml_content)
        except ET.ParseError:
            return None
        appmsg = root.find(".//appmsg")
        if appmsg is None:
            return None
        if appmsg.findtext("type", "").strip() != "57":
            return None
        title = appmsg.findtext("title", "") or ""
        refer = appmsg.find("refermsg")
        if refer is None:
            return None
        ref_type = int(refer.findtext("type", "0") or 0)
        ref_content = refer.findtext("content", "") or ""
        self.LOG.debug(
            f"引用解析成功: type={ref_type}, title_len={len(title)}, content_len={len(ref_content)}")
        return {
            "title": html.unescape(title),
            "ref_type": ref_type,
            "ref_content": html.unescape(ref_content)
        }
    def _extract_quote_image_info(self, ref_content: str) -> Optional[Dict[str, str]]:
        if not ref_content:
            return None
        aeskey_match = re.search(r'aeskey="([^"]+)"', ref_content)
        if not aeskey_match:
            return None
        url_match = re.search(r'cdnmidimgurl="([^"]+)"', ref_content)
        if not url_match:
            url_match = re.search(r'cdnbigimgurl="([^"]+)"', ref_content)
        if not url_match:
            url_match = re.search(r'cdnthumburl="([^"]+)"', ref_content)
        if not url_match:
            return None
        md5_match = re.search(r'md5="([^"]+)"', ref_content)
        return {
            "aeskey": aeskey_match.group(1),
            "url": url_match.group(1),
            "md5": md5_match.group(1) if md5_match else ""
        }
    async def _download_quote_image(self, bot: WechatAPIClient, ref_content: str) -> Optional[str]:
        image_info = self._extract_quote_image_info(ref_content)
        if not image_info:
            return None
        self.LOG.debug(
            f"准备下载引用图片: url_len={len(image_info['url'])}, aeskey_prefix={image_info['aeskey'][:6]}")
        try:
            base64_str = await bot.download_image(
                aeskey=image_info["aeskey"],
                cdnmidimgurl=image_info["url"]
            )
        except Exception as e:
            self.LOG.error(f"下载引用图片失败: {e}")
            return None
        if not base64_str:
            return None
        try:
            image_data = base64.b64decode(base64_str)
        except Exception as e:
            self.LOG.error(f"解码引用图片失败: {e}")
            return None
        temp_dir = Path(__file__).resolve().parents[2] / "temp"
        os.makedirs(temp_dir, exist_ok=True)
        suffix = image_info["md5"] if image_info.get("md5") else str(int(time.time()))
        file_path = temp_dir / f"dify_quote_{suffix}.jpg"
        try:
            with open(file_path, "wb") as f:
                f.write(image_data)
        except Exception as e:
            self.LOG.error(f"保存引用图片失败: {e}")
            return None
        self.LOG.debug(f"引用图片已保存: {file_path}")
        return str(file_path)
    async def _upload_file_to_dify(self, file_path: str, user_id: str) -> Optional[str]:
        if not file_path or not os.path.isfile(file_path):
            return None
        self.LOG.debug(f"开始上传文件到Dify: {file_path}")
        url = f"{self.base_url}/files/upload"
        headers = {"Authorization": f"Bearer {self.api_key}"}
        proxy = self.http_proxy if self.http_proxy else None
        try:
            async with aiohttp.ClientSession() as session:
                with open(file_path, "rb") as f:
                    form = aiohttp.FormData()
                    form.add_field("file", f, filename=os.path.basename(file_path))
                    form.add_field("user", user_id)
                    response = await session.post(url, headers=headers, data=form, proxy=proxy, timeout=40)
                    if response.status != 200:
                        error_text = await response.text()
                        self.LOG.error(f"Dify上传失败: {response.status} {error_text}")
                        return None
                    resp_data = await response.json()
            if isinstance(resp_data, dict):
                if resp_data.get("id"):
                    return resp_data.get("id")
                data = resp_data.get("data", {})
                if isinstance(data, dict):
                    return data.get("id") or data.get("file_id")
            return None
        except Exception as e:
            self.LOG.error(f"上传文件到Dify失败: {e}")
            return None
    async def _chat_with_dify(self, session_id: str, user_id: str, query: str,
                              files: Optional[List[Dict[str, Any]]] = None) -> Tuple[bool, Optional[str]]:
        """
        与Dify API交互获取回复
@@ -281,9 +465,13 @@ class DifyPlugin(MessagePluginInterface):
        if history_text:
            inputs_params["history"] = history_text
        if files is None:
            files = []
        self.LOG.debug(f"Dify请求准备: files={len(files)}")
        # 准备请求数据
        data = {
-            "sys.files": [],
+            "sys.files": files,
            "user": user_id,
            "inputs": inputs_params,
            "response_mode": "blocking"  # 使用阻塞响应模式