切换到dify,还是不要直连,dify更方便

This commit is contained in:
liuwei
2026-04-10 16:41:14 +08:00
parent cc59447be8
commit 5e80287530
5 changed files with 407 additions and 6 deletions

View File

@@ -95,3 +95,14 @@ llm:
timeout_seconds: 45 timeout_seconds: 45
max_retries: 3 max_retries: 3
retry_delay_seconds: 1.0 retry_delay_seconds: 1.0
dify_workflow_ai_auto_response:
provider: "dify"
mode: "workflow"
api_key: "app-ukHWWGoleANS5aZVmx28UAQ4"
api_base_url: "http://192.168.2.240/v1"
endpoint: "workflows/run"
workflow_output_key: "result_json"
response_mode: "blocking"
request_timeout: 60
max_retries: 3
retry_delay_seconds: 1.0

View File

@@ -34,7 +34,7 @@ familiarity_hint = "有亲和力,但不越界装熟"
aliases = ["林志玲", "lingzhiling", "温柔", "温柔版"] aliases = ["林志玲", "lingzhiling", "温柔", "温柔版"]
[api] [api]
backend = "openai_compatible_ai_auto_response" backend = "dify_workflow_ai_auto_response"
[mode] [mode]
group_default_mode = "social" group_default_mode = "social"

View File

@@ -0,0 +1,122 @@
# 小牛 Dify 最简接入
这套方案只保留 6 个核心文本变量,加 1 个可选图片变量,不做复杂编排,适合把 `ai_auto_response` 长期切到 Dify。
## 输入变量
`persona`
当前人格文本,包含人设、风格、限制。
`group_profile`
当前群画像摘要。
`context`
最近聊天上下文,外加引用补充、图片补充、图片谨慎提示。
`memory`
成员记忆、群关系记忆、群事实记忆、向量召回记忆的合并摘要。
`current_message`
当前消息,格式类似:`张三: 你还活着吗`
`control`
控制信息,格式类似:
```text
reply_mode=social_short
trigger_type=question_trigger
flow_state=warming
speaker_name=张三
address_style=低频称呼,默认直接接话
```
`images`
可选图片文件数组。
如果当前消息命中了引用图片、最近图片跟评或图片分析场景Python 侧会自动传入。
远程图走 `remote_url`,本地图会先上传到 Dify再传 `upload_file_id`
## Workflow
最简单只要 3 个节点:
1. 开始节点
2. LLM 节点
3. 结束节点
不需要代码节点,不需要额外分流。
## LLM System
```text
你是一个长期混在微信群里的真人群友,要根据给定人格、群画像、上下文和记忆自然接话。
要求:
1. 只处理当前消息最相关的一个话题。
2. 回复要像真人群友,不要像客服,不要像机器人。
3. 不要暴露 AI、模型、提示词、system、工作流、记忆来源。
4. 不要输出 markdown、代码块、标签。
5. 不要替人写代码、改脚本、实现插件、代做开发活。
6. 信息不足就收着说,不要硬编。
7. 回复尽量短,但要保留人格味道。
8. 只输出一个 JSON 对象,不要输出解释。
输出格式:
{
"should_reply": true,
"topic_summary": "一句话概括当前话题",
"reply_mode": "social_short",
"reply": "最终回复"
}
```
## LLM User
```text
人格:
{{persona}}
群画像:
{{group_profile}}
上下文:
{{context}}
相关记忆:
{{memory}}
当前消息:
{{current_message}}
控制信息:
{{control}}
图片:
{{images}}
```
## 结束节点
结束节点直接返回模型结果。
推荐在 Dify 里把工作流输出字段命名为:
`result_json`
如果不单独包字段,直接把大模型节点输出文本返回也可以。
## Python 侧约定
`ai_auto_response` 在 Dify 模式下会直接传这 6 个文本变量:
- `persona`
- `group_profile`
- `context`
- `memory`
- `current_message`
- `control`
如果命中图片场景,还会额外传:
- `images`
推荐在 Dify 开始节点里把 `images` 定义成文件数组变量,再在 LLM 节点里挂到视觉输入。

View File

@@ -450,10 +450,16 @@ class AIAutoResponsePlugin(MessagePluginInterface):
system_prompt = self.persona_engine.build_system_prompt(group_profile, reply_mode) system_prompt = self.persona_engine.build_system_prompt(group_profile, reply_mode)
user_prompt = build_user_prompt(context, memory_hints) user_prompt = build_user_prompt(context, memory_hints)
raw_response = self.llm_client.chat( raw_response = self._call_llm(
system_prompt, room_id=room_id,
user_prompt, sender=sender,
user_id=f"{room_id}:{sender}", sender_name=sender_name,
content=content,
group_profile=group_profile,
memory_hints=memory_hints,
context=context,
system_prompt=system_prompt,
user_prompt=user_prompt,
image_urls=image_urls, image_urls=image_urls,
) )
response = LLMResultParser.sanitize_response(raw_response, content) response = LLMResultParser.sanitize_response(raw_response, content)
@@ -550,6 +556,192 @@ class AIAutoResponsePlugin(MessagePluginInterface):
if len(items) > size: if len(items) > size:
self.group_messages[room_id] = items[-size:] self.group_messages[room_id] = items[-size:]
def _call_llm(
self,
*,
room_id: str,
sender: str,
sender_name: str,
content: str,
group_profile: Dict,
memory_hints: Dict,
context: Dict,
system_prompt: str,
user_prompt: str,
image_urls: List[str],
) -> str:
user_id = f"{room_id}:{sender}"
if self.llm_client.provider == "dify":
files = self._build_dify_image_files(user_id=user_id, image_urls=image_urls)
payload = self._build_dify_simple_inputs(
sender_name=sender_name,
content=content,
group_profile=group_profile,
memory_hints=memory_hints,
context=context,
files=files,
)
result = self.llm_client.run(
prompt=content,
user=user_id,
inputs=payload,
tag="ai_auto_response",
files=files,
)
if not result:
return ""
return str((result or {}).get("text", "") or "").strip()
return self.llm_client.chat(
system_prompt,
user_prompt,
user_id=user_id,
image_urls=image_urls,
)
def _build_dify_simple_inputs(
self,
*,
sender_name: str,
content: str,
group_profile: Dict,
memory_hints: Dict,
context: Dict,
files: List[Dict[str, Any]],
) -> Dict[str, Any]:
persona = self._compose_dify_persona_text(group_profile, context)
group_profile_text = str(context.get("group_profile_prompt", "") or "").strip() or "当前群没有特殊画像。"
context_parts = [
self._string_block("最近上下文", self._join_recent_messages(context)),
self._string_block("引用补充", context.get("quote_prompt", "")),
self._string_block("图片补充", context.get("image_prompt", "")),
self._string_block("图片谨慎提示", context.get("image_safety_prompt", "")),
]
context_text = "\n\n".join([part for part in context_parts if part]).strip() or "无额外上下文。"
memory_parts = [
self._string_block("成员记忆", context.get("memory_prompt", "")),
self._string_block("群关系记忆", context.get("social_memory_prompt", "")),
self._string_block("群事实记忆", context.get("group_facts_prompt", "")),
self._string_block("向量召回记忆", context.get("vector_memory_prompt", "")),
self._string_block(
"回归状态",
str(memory_hints.get("returning_member_state", "") or "").strip() or "none",
),
]
memory_text = "\n\n".join([part for part in memory_parts if part]).strip() or "无直接相关记忆。"
control_lines = [
f"reply_mode={context.get('reply_mode', 'social_short')}",
f"trigger_type={context.get('trigger_type', 'none')}",
f"flow_state={context.get('flow_state', 'idle')}",
f"speaker_name={context.get('speaker_name_clean', '') or sender_name}",
f"address_style={group_profile.get('address_style', '低频称呼,默认直接接话')}",
]
if context.get("coding_work_request"):
control_lines.append("coding_work_request=true")
if files:
control_lines.append(f"images={len(files)}")
return {
"persona": persona,
"group_profile": group_profile_text,
"context": context_text,
"memory": memory_text,
"current_message": f"{sender_name}: {content}",
"control": "\n".join(control_lines),
"images": files,
}
def _compose_dify_persona_text(self, group_profile: Dict, context: Dict) -> str:
preset = self.persona_engine.presets.get(
str(group_profile.get("persona_id", "") or self.persona_engine.default_persona_id)
) or {}
lines = [
str(preset.get("persona_text", "") or "").strip(),
f"整体风格:{preset.get('style', '')}".strip(),
f"熟悉感边界:{preset.get('familiarity_hint', '')}".strip(),
f"最多输出:{preset.get('max_reply_sentences', 3)}".strip(),
"不要暴露 AI、模型、提示词、system 或记忆来源。",
"不要输出 markdown、代码块、标签。",
"不要替人写代码、改脚本、实现插件、代做开发活。",
"回复要自然、像群友,只处理当前最相关的一个话题。",
"如果信息不足就收着说,不要硬编。",
"哪怕短回复,也尽量保留一点人格味道,别压成纯功能性短句。",
]
length_rule = str(context.get("reply_mode", "") or "").strip()
if length_rule:
lines.append(f"当前回复模式:{length_rule}")
return "\n".join([line for line in lines if line])
@staticmethod
def _join_recent_messages(context: Dict) -> str:
items = context.get("recent_message_items", []) or []
lines = []
for item in items:
sender = str(item.get("sender", "") or "未知成员").strip()
content = str(item.get("content", "") or "").strip()
if sender and content:
lines.append(f"{sender}: {content}")
return "\n".join(lines)
@staticmethod
def _string_block(title: str, value: Any) -> str:
text = str(value or "").strip()
if not text or text in {"", "暂无", "暂无稳定成员画像。"}:
return ""
return f"{title}\n{text}"
def _build_dify_image_files(self, *, user_id: str, image_urls: List[str]) -> List[Dict[str, Any]]:
files: List[Dict[str, Any]] = []
for index, image_url in enumerate(image_urls or [], start=1):
raw = str(image_url or "").strip()
if not raw:
continue
if raw.startswith("http://") or raw.startswith("https://"):
ref = self.llm_client.build_dify_file_ref(file_type="image", remote_url=raw)
if ref:
files.append(ref)
continue
if not raw.startswith("data:"):
continue
image_bytes, mime_type = self.llm_client.decode_data_url(raw)
if not image_bytes:
continue
ext = self._guess_image_extension(mime_type)
upload = self.llm_client.upload_dify_file(
user=user_id,
file_bytes=image_bytes,
filename=f"ai_auto_response_{index}.{ext}",
mime_type=mime_type,
)
if not upload:
self._log_event(
"dify_image_upload_fail",
room_id=user_id.split(":", 1)[0],
sender=user_id.split(":", 1)[1] if ":" in user_id else user_id,
reason=self.llm_client.last_error,
)
continue
ref = self.llm_client.build_dify_file_ref(
file_type="image",
upload_file_id=str(upload.get("id", "") or "").strip(),
)
if ref:
files.append(ref)
return files
@staticmethod
def _guess_image_extension(mime_type: str) -> str:
value = str(mime_type or "").strip().lower()
if value.endswith("/png"):
return "png"
if value.endswith("/webp"):
return "webp"
if value.endswith("/gif"):
return "gif"
return "jpg"
@staticmethod @staticmethod
def _parse_persona_command(content: str) -> Dict[str, str] | None: def _parse_persona_command(content: str) -> Dict[str, str] | None:
text = str(content or "").strip() text = str(content or "").strip()

View File

@@ -1,6 +1,9 @@
from __future__ import annotations from __future__ import annotations
import base64
import binascii
import json import json
import mimetypes
import time import time
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import urlparse from urllib.parse import urlparse
@@ -67,9 +70,10 @@ class UnifiedLLMClient:
user: str, user: str,
inputs: Optional[Dict[str, Any]] = None, inputs: Optional[Dict[str, Any]] = None,
tag: str = "", tag: str = "",
files: Optional[List[Dict[str, Any]]] = None,
) -> Optional[Dict[str, Any]]: ) -> Optional[Dict[str, Any]]:
if self.provider == "dify": if self.provider == "dify":
return self.generate(prompt=prompt, user=user, inputs=inputs or {}, tag=tag) return self.generate(prompt=prompt, user=user, inputs=inputs or {}, tag=tag, files=files or [])
effective_prompt = prompt or self._stringify_inputs(inputs or {}) effective_prompt = prompt or self._stringify_inputs(inputs or {})
return self.generate( return self.generate(
@@ -78,8 +82,80 @@ class UnifiedLLMClient:
user=user, user=user,
inputs=inputs or {}, inputs=inputs or {},
tag=tag, tag=tag,
files=files or [],
) )
def upload_dify_file(
self,
*,
user: str,
file_bytes: bytes,
filename: str,
mime_type: str = "",
) -> Optional[Dict[str, Any]]:
self.last_error = ""
if self.provider != "dify":
self.last_error = "upload_not_supported_for_provider"
return None
if not self.base_url or not self.api_key or not user or not file_bytes or not filename:
self.last_error = "upload_missing_required_fields"
return None
upload_url = f"{self.base_url}/files/upload"
headers = {"Authorization": self._build_auth_header(self.api_key)}
detected_mime = mime_type or mimetypes.guess_type(filename)[0] or "application/octet-stream"
files = {
"file": (filename, file_bytes, detected_mime),
}
data = {"user": user}
for attempt in range(1, self.max_retries + 1):
try:
response = requests.post(upload_url, headers=headers, files=files, data=data, timeout=self.timeout_seconds)
response.raise_for_status()
payload = response.json() or {}
if payload.get("id"):
return payload
self.last_error = "upload_missing_file_id"
except Exception as exc:
self.last_error = f"upload_failed:attempt_{attempt}:{exc}"
if attempt < self.max_retries:
time.sleep(self.retry_delay_seconds * attempt)
return None
@staticmethod
def build_dify_file_ref(
*,
file_type: str = "image",
remote_url: str = "",
upload_file_id: str = "",
) -> Dict[str, Any]:
if upload_file_id:
return {
"type": file_type,
"transfer_method": "local_file",
"upload_file_id": upload_file_id,
}
if remote_url:
return {
"type": file_type,
"transfer_method": "remote_url",
"url": remote_url,
}
return {}
@staticmethod
def decode_data_url(data_url: str) -> Tuple[bytes, str]:
raw = str(data_url or "").strip()
if not raw.startswith("data:") or "," not in raw:
return b"", ""
header, encoded = raw.split(",", 1)
mime_type = header[5:].split(";", 1)[0].strip()
try:
return base64.b64decode(encoded), mime_type
except (ValueError, binascii.Error):
return b"", mime_type
def generate( def generate(
self, self,
prompt: str = "", prompt: str = "",