尝试dify读取图片!

This commit is contained in:
liuwei
2026-02-26 17:25:15 +08:00
parent 0315b064c5
commit 9582d5ed80

View File

@@ -5,6 +5,9 @@ import json
import time import time
import re # 添加re模块导入 import re # 添加re模块导入
import asyncio import asyncio
import base64
import html
import xml.etree.ElementTree as ET
from typing import Dict, Any, List, Optional, Tuple from typing import Dict, Any, List, Optional, Tuple
from pathlib import Path from pathlib import Path
@@ -20,6 +23,7 @@ from utils.decorator.points_decorator import plugin_points_cost
from utils.media_downloader import MediaDownloader from utils.media_downloader import MediaDownloader
from utils.string_utils import remove_trailing_content, remove_grok_render_tags from utils.string_utils import remove_trailing_content, remove_grok_render_tags
from wechat_ipad import WechatAPIClient from wechat_ipad import WechatAPIClient
from wechat_ipad.models.message import MessageType
import aiohttp import aiohttp
# 常见的图片和视频文件扩展名 # 常见的图片和视频文件扩展名
@@ -148,6 +152,8 @@ class DifyPlugin(MessagePluginInterface):
# 获取目标接收者群聊为群ID私聊为个人wxid # 获取目标接收者群聊为群ID私聊为个人wxid
target = roomid if roomid else sender target = roomid if roomid else sender
self.LOG.debug(
f"消息上下文: sender={sender}, roomid={roomid}, target={target}, is_at={message.get('is_at', False)}")
# 检查权限 # 检查权限
if roomid and gbm.get_group_permission(target, self.feature) == PermissionStatus.DISABLED: if roomid and gbm.get_group_permission(target, self.feature) == PermissionStatus.DISABLED:
@@ -158,7 +164,7 @@ class DifyPlugin(MessagePluginInterface):
# 去除@的人和空格等字符 # 去除@的人和空格等字符
query = re.sub(r"@.*?[\u2005|\s]", "", content).strip() query = re.sub(r"@.*?[\u2005|\s]", "", content).strip()
if not query: if not query:
await bot.send_at_message(target, "请在@我的同时提供问题内容", [sender]) # await bot.send_at_message(target, "请在@我的同时提供问题内容", [sender])
return False, "没有提供问题内容" return False, "没有提供问题内容"
else: else:
# 处理命令消息 # 处理命令消息
@@ -168,16 +174,54 @@ class DifyPlugin(MessagePluginInterface):
return False, "命令格式错误" return False, "命令格式错误"
query = parts[1].strip() query = parts[1].strip()
self.LOG.debug(f"解析请求: query_len={len(query)} query_preview={query[:120]}")
try: try:
# 发送等待消息 # 发送等待消息
# client_msg_id, create_time, new_msg_id = await bot.send_text_message( # client_msg_id, create_time, new_msg_id = await bot.send_text_message(
# target, "⏳AI 正在加油,请稍候… 😊", sender if roomid else "") # target, "⏳AI 正在加油,请稍候… 😊", sender if roomid else "")
# revoke.add_message_to_revoke(target, client_msg_id, create_time, new_msg_id, 3) # revoke.add_message_to_revoke(target, client_msg_id, create_time, new_msg_id, 3)
# TODO支持@ 消息(比如:图片引用,文本聊天引用,视频引用) dify_files = []
# 需要根据引用的信息,进行相关资源下载。如果是表情,则不理解。 quote_payload = self._parse_quote_payload(message.get("full_wx_msg"))
# 调用Dify API获取回复 if quote_payload:
success, response = await self._chat_with_dify(target, sender, query) ref_type = quote_payload.get("ref_type", 0)
ref_content = quote_payload.get("ref_content", "").strip()
title = quote_payload.get("title", "").strip()
self.LOG.debug(
f"检测到引用消息: type={ref_type}, title_preview={title[:80]}, ref_len={len(ref_content)}")
if ref_type == MessageType.TEXT.value:
if ref_content:
self.LOG.debug("使用引用文本作为问题")
query = ref_content
elif ref_type == MessageType.IMAGE.value:
if title:
title_query = re.sub(r"@.*?[\u2005|\s]", "", title).strip()
if title_query:
query = title_query
self.LOG.debug("检测到引用图片,开始下载与上传")
image_path = await self._download_quote_image(bot, ref_content)
if not image_path:
return False, "图片引用解析失败"
upload_id = await self._upload_file_to_dify(image_path, sender)
if not upload_id:
return False, "图片上传失败"
self.LOG.debug(f"图片上传完成: upload_id={upload_id}")
dify_files.append({
"type": "image",
"transfer_method": "local_file",
"upload_file_id": upload_id
})
elif ref_type == MessageType.VIDEO.value:
return False, "暂不支持视频引用"
elif ref_type in (MessageType.EMOTICON.value, MessageType.EMOJI.value):
return False, "暂不支持表情引用"
success, response = await self._chat_with_dify(target, sender, query, dify_files)
if not success: if not success:
return False, response return False, response
@@ -232,7 +276,147 @@ class DifyPlugin(MessagePluginInterface):
self.LOG.error(f"发送响应消息时出错: {e}") self.LOG.error(f"发送响应消息时出错: {e}")
return False, f"发送响应失败: {e}" return False, f"发送响应失败: {e}"
async def _chat_with_dify(self, session_id: str, user_id: str, query: str) -> Tuple[bool, Optional[str]]: def _parse_quote_payload(self, full_msg: Any) -> Optional[Dict[str, Any]]:
if not full_msg or not getattr(full_msg, "content", None):
return None
xml_content = getattr(full_msg.content, "xml_content", "")
if not xml_content:
return None
try:
root = ET.fromstring(xml_content)
except ET.ParseError:
return None
appmsg = root.find(".//appmsg")
if appmsg is None:
return None
if appmsg.findtext("type", "").strip() != "57":
return None
title = appmsg.findtext("title", "") or ""
refer = appmsg.find("refermsg")
if refer is None:
return None
ref_type = int(refer.findtext("type", "0") or 0)
ref_content = refer.findtext("content", "") or ""
self.LOG.debug(
f"引用解析成功: type={ref_type}, title_len={len(title)}, content_len={len(ref_content)}")
return {
"title": html.unescape(title),
"ref_type": ref_type,
"ref_content": html.unescape(ref_content)
}
def _extract_quote_image_info(self, ref_content: str) -> Optional[Dict[str, str]]:
if not ref_content:
return None
aeskey_match = re.search(r'aeskey="([^"]+)"', ref_content)
if not aeskey_match:
return None
url_match = re.search(r'cdnmidimgurl="([^"]+)"', ref_content)
if not url_match:
url_match = re.search(r'cdnbigimgurl="([^"]+)"', ref_content)
if not url_match:
url_match = re.search(r'cdnthumburl="([^"]+)"', ref_content)
if not url_match:
return None
md5_match = re.search(r'md5="([^"]+)"', ref_content)
return {
"aeskey": aeskey_match.group(1),
"url": url_match.group(1),
"md5": md5_match.group(1) if md5_match else ""
}
async def _download_quote_image(self, bot: WechatAPIClient, ref_content: str) -> Optional[str]:
image_info = self._extract_quote_image_info(ref_content)
if not image_info:
return None
self.LOG.debug(
f"准备下载引用图片: url_len={len(image_info['url'])}, aeskey_prefix={image_info['aeskey'][:6]}")
try:
base64_str = await bot.download_image(
aeskey=image_info["aeskey"],
cdnmidimgurl=image_info["url"]
)
except Exception as e:
self.LOG.error(f"下载引用图片失败: {e}")
return None
if not base64_str:
return None
try:
image_data = base64.b64decode(base64_str)
except Exception as e:
self.LOG.error(f"解码引用图片失败: {e}")
return None
temp_dir = Path(__file__).resolve().parents[2] / "temp"
os.makedirs(temp_dir, exist_ok=True)
suffix = image_info["md5"] if image_info.get("md5") else str(int(time.time()))
file_path = temp_dir / f"dify_quote_{suffix}.jpg"
try:
with open(file_path, "wb") as f:
f.write(image_data)
except Exception as e:
self.LOG.error(f"保存引用图片失败: {e}")
return None
self.LOG.debug(f"引用图片已保存: {file_path}")
return str(file_path)
async def _upload_file_to_dify(self, file_path: str, user_id: str) -> Optional[str]:
if not file_path or not os.path.isfile(file_path):
return None
self.LOG.debug(f"开始上传文件到Dify: {file_path}")
url = f"{self.base_url}/files/upload"
headers = {"Authorization": f"Bearer {self.api_key}"}
proxy = self.http_proxy if self.http_proxy else None
try:
async with aiohttp.ClientSession() as session:
with open(file_path, "rb") as f:
form = aiohttp.FormData()
form.add_field("file", f, filename=os.path.basename(file_path))
form.add_field("user", user_id)
response = await session.post(url, headers=headers, data=form, proxy=proxy, timeout=40)
if response.status != 200:
error_text = await response.text()
self.LOG.error(f"Dify上传失败: {response.status} {error_text}")
return None
resp_data = await response.json()
if isinstance(resp_data, dict):
if resp_data.get("id"):
return resp_data.get("id")
data = resp_data.get("data", {})
if isinstance(data, dict):
return data.get("id") or data.get("file_id")
return None
except Exception as e:
self.LOG.error(f"上传文件到Dify失败: {e}")
return None
async def _chat_with_dify(self, session_id: str, user_id: str, query: str,
files: Optional[List[Dict[str, Any]]] = None) -> Tuple[bool, Optional[str]]:
""" """
与Dify API交互获取回复 与Dify API交互获取回复
@@ -281,9 +465,13 @@ class DifyPlugin(MessagePluginInterface):
if history_text: if history_text:
inputs_params["history"] = history_text inputs_params["history"] = history_text
if files is None:
files = []
self.LOG.debug(f"Dify请求准备: files={len(files)}")
# 准备请求数据 # 准备请求数据
data = { data = {
"sys.files": [], "sys.files": files,
"user": user_id, "user": user_id,
"inputs": inputs_params, "inputs": inputs_params,
"response_mode": "blocking" # 使用阻塞响应模式 "response_mode": "blocking" # 使用阻塞响应模式