- 将 member_context 的 Dify workflow 调用响应模式切换为 streaming,提高长耗时工作流的连接稳定性 - 将成员画像工作流请求超时时间从 60 秒提升到 240 秒,适配当前群日批量提取任务的实际耗时 - 扩展 DifyClient,支持 workflow streaming 响应解析,在流式场景下尽量提取最终输出或增量文本 - 调整群日画像提取逻辑,AI 未返回成员有效结构化结果时不再写入 fallback 通用数据,而是直接跳过,等待下次任务重试 - 调整周/月周期摘要生成逻辑,AI 未返回有效结果时不再使用本地兜底拼装摘要,避免写入低质量周期画像 - 删除成员日摘要和周期摘要对应的 fallback 生成逻辑,彻底阻断这类无意义垃圾画像继续入库 - 新增跳过日志,明确标记哪些成员或周期摘要因为未提取到有效 AI 结果而未入库,便于后续诊断稳定性问题
188 lines
7.3 KiB
Python
188 lines
7.3 KiB
Python
# -*- coding: utf-8 -*-
|
|
import json
|
|
from typing import Dict, Optional
|
|
|
|
import requests
|
|
from loguru import logger
|
|
|
|
|
|
class DifyClient:
|
|
"""Dify completion/workflow 通用调用客户端"""
|
|
|
|
def __init__(self, api_config: Optional[Dict] = None):
|
|
api_config = api_config or {}
|
|
self.LOG = logger
|
|
self.enabled = bool(api_config.get("enable", api_config.get("enabled", False)))
|
|
self.base_url = (api_config.get("base_url") or "").rstrip("/")
|
|
self.api_key = api_config.get("api_key", "")
|
|
self.timeout = int(api_config.get("request_timeout", 60))
|
|
self.mode = str(api_config.get("mode", "completion")).strip().lower()
|
|
default_endpoint = "workflows/run" if self.mode == "workflow" else "completion-messages"
|
|
self.endpoint = str(api_config.get("endpoint", default_endpoint)).lstrip("/")
|
|
self.workflow_output_key = str(api_config.get("workflow_output_key", "text")).strip()
|
|
self.response_mode = str(api_config.get("response_mode", "blocking")).strip().lower()
|
|
|
|
def is_available(self) -> bool:
|
|
return self.enabled and bool(self.base_url and self.api_key)
|
|
|
|
def run(self, prompt: str, user: str, inputs: Optional[Dict] = None,
|
|
tag: str = "") -> Optional[Dict]:
|
|
if not self.is_available():
|
|
return None
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
payload_inputs = dict(inputs or {})
|
|
if self.mode == "completion":
|
|
payload_inputs.setdefault("query", prompt)
|
|
elif prompt and "query" not in payload_inputs:
|
|
payload_inputs["query"] = prompt
|
|
|
|
payload = {
|
|
"inputs": payload_inputs,
|
|
"response_mode": self.response_mode,
|
|
"user": user,
|
|
}
|
|
url = f"{self.base_url}/{self.endpoint}"
|
|
try:
|
|
self.LOG.info(
|
|
f"[成员交互摘要][Dify] 发起请求: mode={self.mode}, response_mode={self.response_mode}, "
|
|
f"endpoint={self.endpoint}, tag={tag}"
|
|
)
|
|
if self.response_mode == "streaming":
|
|
parsed = self._run_streaming(url, headers, payload, tag)
|
|
else:
|
|
response = requests.post(url, headers=headers, json=payload, timeout=self.timeout)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
parsed = self._parse_response(data)
|
|
if parsed is not None:
|
|
return parsed
|
|
self.LOG.warning(f"[成员交互摘要][Dify] 响应内容为空: mode={self.mode}, tag={tag}")
|
|
return None
|
|
except Exception as e:
|
|
self.LOG.warning(f"[成员交互摘要][Dify] 请求失败: mode={self.mode}, tag={tag}, error={e}")
|
|
return None
|
|
|
|
def _run_streaming(self, url: str, headers: Dict, payload: Dict, tag: str) -> Optional[Dict]:
|
|
with requests.post(url, headers=headers, json=payload, timeout=self.timeout, stream=True) as response:
|
|
response.raise_for_status()
|
|
event_name = ""
|
|
text_fragments = []
|
|
final_payload = None
|
|
|
|
for raw_line in response.iter_lines(decode_unicode=True):
|
|
if raw_line is None:
|
|
continue
|
|
line = str(raw_line).strip()
|
|
if not line:
|
|
continue
|
|
if line.startswith("event:"):
|
|
event_name = line[6:].strip()
|
|
continue
|
|
if not line.startswith("data:"):
|
|
continue
|
|
|
|
data_text = line[5:].strip()
|
|
if not data_text or data_text == "[DONE]":
|
|
continue
|
|
try:
|
|
chunk = json.loads(data_text)
|
|
except Exception:
|
|
continue
|
|
|
|
candidate_text = self._extract_stream_text(chunk)
|
|
if candidate_text:
|
|
text_fragments.append(candidate_text)
|
|
|
|
chunk_event = str(chunk.get("event") or event_name or "").strip()
|
|
if chunk_event in {"workflow_finished", "message_end"}:
|
|
final_payload = chunk
|
|
|
|
if final_payload:
|
|
parsed = self._parse_response(final_payload)
|
|
if parsed and parsed.get("text"):
|
|
return parsed
|
|
|
|
text = "".join(fragment for fragment in text_fragments if fragment)
|
|
if text:
|
|
return {
|
|
"text": text.strip(),
|
|
"usage": {},
|
|
"raw": final_payload or {},
|
|
}
|
|
|
|
self.LOG.warning(f"[成员交互摘要][Dify] 流式响应未产出有效内容: tag={tag}")
|
|
return None
|
|
|
|
def _parse_response(self, data: Dict) -> Optional[Dict]:
|
|
if self.mode == "workflow":
|
|
return self._parse_workflow_response(data)
|
|
answer = data.get("answer", "")
|
|
usage = (data.get("metadata") or {}).get("usage", {}) or {}
|
|
return {
|
|
"text": str(answer or "").strip(),
|
|
"usage": usage,
|
|
"raw": data,
|
|
}
|
|
|
|
def _parse_workflow_response(self, data: Dict) -> Optional[Dict]:
|
|
payload = (data or {}).get("data", {}) or {}
|
|
outputs = payload.get("outputs", {}) or {}
|
|
text = ""
|
|
|
|
if self.workflow_output_key and outputs.get(self.workflow_output_key) is not None:
|
|
value = outputs.get(self.workflow_output_key)
|
|
text = self._stringify_output(value)
|
|
elif outputs.get("text") is not None:
|
|
text = self._stringify_output(outputs.get("text"))
|
|
elif outputs.get("answer") is not None:
|
|
text = self._stringify_output(outputs.get("answer"))
|
|
elif outputs.get("result_json") is not None:
|
|
text = self._stringify_output(outputs.get("result_json"))
|
|
elif outputs.get("result") is not None:
|
|
text = self._stringify_output(outputs.get("result"))
|
|
else:
|
|
for value in outputs.values():
|
|
text = self._stringify_output(value)
|
|
if text:
|
|
break
|
|
|
|
usage = {
|
|
"total_tokens": payload.get("total_tokens"),
|
|
"latency": payload.get("elapsed_time"),
|
|
}
|
|
return {
|
|
"text": str(text or "").strip(),
|
|
"usage": usage,
|
|
"raw": data,
|
|
}
|
|
|
|
def _extract_stream_text(self, chunk: Dict) -> str:
|
|
if not isinstance(chunk, dict):
|
|
return ""
|
|
payload = (chunk.get("data") or {}) if isinstance(chunk.get("data"), dict) else {}
|
|
outputs = payload.get("outputs", {}) if isinstance(payload.get("outputs"), dict) else {}
|
|
|
|
for key in filter(None, [self.workflow_output_key, "text", "answer", "result_json", "result"]):
|
|
if outputs.get(key) is not None:
|
|
return self._stringify_output(outputs.get(key))
|
|
|
|
for key in ("text", "answer"):
|
|
if chunk.get(key) is not None:
|
|
return self._stringify_output(chunk.get(key))
|
|
|
|
return ""
|
|
|
|
@staticmethod
|
|
def _stringify_output(value) -> str:
|
|
if value is None:
|
|
return ""
|
|
if isinstance(value, str):
|
|
return value.strip()
|
|
if isinstance(value, (dict, list)):
|
|
return json.dumps(value, ensure_ascii=False)
|
|
return str(value).strip()
|