尝试dify读取图片!

This commit is contained in:
liuwei
2026-02-26 17:25:15 +08:00
parent 0315b064c5
commit 9582d5ed80

View File

@@ -5,6 +5,9 @@ import json
import time
import re # 添加re模块导入
import asyncio
import base64
import html
import xml.etree.ElementTree as ET
from typing import Dict, Any, List, Optional, Tuple
from pathlib import Path
@@ -20,6 +23,7 @@ from utils.decorator.points_decorator import plugin_points_cost
from utils.media_downloader import MediaDownloader
from utils.string_utils import remove_trailing_content, remove_grok_render_tags
from wechat_ipad import WechatAPIClient
from wechat_ipad.models.message import MessageType
import aiohttp
# 常见的图片和视频文件扩展名
@@ -148,6 +152,8 @@ class DifyPlugin(MessagePluginInterface):
# 获取目标接收者群聊为群ID私聊为个人wxid
target = roomid if roomid else sender
self.LOG.debug(
f"消息上下文: sender={sender}, roomid={roomid}, target={target}, is_at={message.get('is_at', False)}")
# 检查权限
if roomid and gbm.get_group_permission(target, self.feature) == PermissionStatus.DISABLED:
@@ -158,7 +164,7 @@ class DifyPlugin(MessagePluginInterface):
# 去除@的人和空格等字符
query = re.sub(r"@.*?[\u2005|\s]", "", content).strip()
if not query:
await bot.send_at_message(target, "请在@我的同时提供问题内容", [sender])
# await bot.send_at_message(target, "请在@我的同时提供问题内容", [sender])
return False, "没有提供问题内容"
else:
# 处理命令消息
@@ -168,16 +174,54 @@ class DifyPlugin(MessagePluginInterface):
return False, "命令格式错误"
query = parts[1].strip()
self.LOG.debug(f"解析请求: query_len={len(query)} query_preview={query[:120]}")
try:
# 发送等待消息
# client_msg_id, create_time, new_msg_id = await bot.send_text_message(
# target, "⏳AI 正在加油,请稍候… 😊", sender if roomid else "")
# revoke.add_message_to_revoke(target, client_msg_id, create_time, new_msg_id, 3)
# TODO支持@ 消息(比如:图片引用,文本聊天引用,视频引用)
# 需要根据引用的信息,进行相关资源下载。如果是表情,则不理解。
# 调用Dify API获取回复
success, response = await self._chat_with_dify(target, sender, query)
dify_files = []
quote_payload = self._parse_quote_payload(message.get("full_wx_msg"))
if quote_payload:
ref_type = quote_payload.get("ref_type", 0)
ref_content = quote_payload.get("ref_content", "").strip()
title = quote_payload.get("title", "").strip()
self.LOG.debug(
f"检测到引用消息: type={ref_type}, title_preview={title[:80]}, ref_len={len(ref_content)}")
if ref_type == MessageType.TEXT.value:
if ref_content:
self.LOG.debug("使用引用文本作为问题")
query = ref_content
elif ref_type == MessageType.IMAGE.value:
if title:
title_query = re.sub(r"@.*?[\u2005|\s]", "", title).strip()
if title_query:
query = title_query
self.LOG.debug("检测到引用图片,开始下载与上传")
image_path = await self._download_quote_image(bot, ref_content)
if not image_path:
return False, "图片引用解析失败"
upload_id = await self._upload_file_to_dify(image_path, sender)
if not upload_id:
return False, "图片上传失败"
self.LOG.debug(f"图片上传完成: upload_id={upload_id}")
dify_files.append({
"type": "image",
"transfer_method": "local_file",
"upload_file_id": upload_id
})
elif ref_type == MessageType.VIDEO.value:
return False, "暂不支持视频引用"
elif ref_type in (MessageType.EMOTICON.value, MessageType.EMOJI.value):
return False, "暂不支持表情引用"
success, response = await self._chat_with_dify(target, sender, query, dify_files)
if not success:
return False, response
@@ -232,7 +276,147 @@ class DifyPlugin(MessagePluginInterface):
self.LOG.error(f"发送响应消息时出错: {e}")
return False, f"发送响应失败: {e}"
async def _chat_with_dify(self, session_id: str, user_id: str, query: str) -> Tuple[bool, Optional[str]]:
def _parse_quote_payload(self, full_msg: Any) -> Optional[Dict[str, Any]]:
if not full_msg or not getattr(full_msg, "content", None):
return None
xml_content = getattr(full_msg.content, "xml_content", "")
if not xml_content:
return None
try:
root = ET.fromstring(xml_content)
except ET.ParseError:
return None
appmsg = root.find(".//appmsg")
if appmsg is None:
return None
if appmsg.findtext("type", "").strip() != "57":
return None
title = appmsg.findtext("title", "") or ""
refer = appmsg.find("refermsg")
if refer is None:
return None
ref_type = int(refer.findtext("type", "0") or 0)
ref_content = refer.findtext("content", "") or ""
self.LOG.debug(
f"引用解析成功: type={ref_type}, title_len={len(title)}, content_len={len(ref_content)}")
return {
"title": html.unescape(title),
"ref_type": ref_type,
"ref_content": html.unescape(ref_content)
}
def _extract_quote_image_info(self, ref_content: str) -> Optional[Dict[str, str]]:
if not ref_content:
return None
aeskey_match = re.search(r'aeskey="([^"]+)"', ref_content)
if not aeskey_match:
return None
url_match = re.search(r'cdnmidimgurl="([^"]+)"', ref_content)
if not url_match:
url_match = re.search(r'cdnbigimgurl="([^"]+)"', ref_content)
if not url_match:
url_match = re.search(r'cdnthumburl="([^"]+)"', ref_content)
if not url_match:
return None
md5_match = re.search(r'md5="([^"]+)"', ref_content)
return {
"aeskey": aeskey_match.group(1),
"url": url_match.group(1),
"md5": md5_match.group(1) if md5_match else ""
}
async def _download_quote_image(self, bot: WechatAPIClient, ref_content: str) -> Optional[str]:
image_info = self._extract_quote_image_info(ref_content)
if not image_info:
return None
self.LOG.debug(
f"准备下载引用图片: url_len={len(image_info['url'])}, aeskey_prefix={image_info['aeskey'][:6]}")
try:
base64_str = await bot.download_image(
aeskey=image_info["aeskey"],
cdnmidimgurl=image_info["url"]
)
except Exception as e:
self.LOG.error(f"下载引用图片失败: {e}")
return None
if not base64_str:
return None
try:
image_data = base64.b64decode(base64_str)
except Exception as e:
self.LOG.error(f"解码引用图片失败: {e}")
return None
temp_dir = Path(__file__).resolve().parents[2] / "temp"
os.makedirs(temp_dir, exist_ok=True)
suffix = image_info["md5"] if image_info.get("md5") else str(int(time.time()))
file_path = temp_dir / f"dify_quote_{suffix}.jpg"
try:
with open(file_path, "wb") as f:
f.write(image_data)
except Exception as e:
self.LOG.error(f"保存引用图片失败: {e}")
return None
self.LOG.debug(f"引用图片已保存: {file_path}")
return str(file_path)
async def _upload_file_to_dify(self, file_path: str, user_id: str) -> Optional[str]:
if not file_path or not os.path.isfile(file_path):
return None
self.LOG.debug(f"开始上传文件到Dify: {file_path}")
url = f"{self.base_url}/files/upload"
headers = {"Authorization": f"Bearer {self.api_key}"}
proxy = self.http_proxy if self.http_proxy else None
try:
async with aiohttp.ClientSession() as session:
with open(file_path, "rb") as f:
form = aiohttp.FormData()
form.add_field("file", f, filename=os.path.basename(file_path))
form.add_field("user", user_id)
response = await session.post(url, headers=headers, data=form, proxy=proxy, timeout=40)
if response.status != 200:
error_text = await response.text()
self.LOG.error(f"Dify上传失败: {response.status} {error_text}")
return None
resp_data = await response.json()
if isinstance(resp_data, dict):
if resp_data.get("id"):
return resp_data.get("id")
data = resp_data.get("data", {})
if isinstance(data, dict):
return data.get("id") or data.get("file_id")
return None
except Exception as e:
self.LOG.error(f"上传文件到Dify失败: {e}")
return None
async def _chat_with_dify(self, session_id: str, user_id: str, query: str,
files: Optional[List[Dict[str, Any]]] = None) -> Tuple[bool, Optional[str]]:
"""
与Dify API交互获取回复
@@ -281,9 +465,13 @@ class DifyPlugin(MessagePluginInterface):
if history_text:
inputs_params["history"] = history_text
if files is None:
files = []
self.LOG.debug(f"Dify请求准备: files={len(files)}")
# 准备请求数据
data = {
"sys.files": [],
"sys.files": files,
"user": user_id,
"inputs": inputs_params,
"response_mode": "blocking" # 使用阻塞响应模式