refactor: centralize llm backend configuration

2026-04-08 13:43:41 +08:00
parent df1939d60b
commit aecb62cb4d
19 changed files with 945 additions and 792 deletions
--- a/utils/ai/llm_registry.py
+++ b/utils/ai/llm_registry.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+import yaml
+
+
+class LLMRegistry:
+    """从项目根 config.yaml 读取集中式 LLM 后端配置。"""
+
+    _cache: Dict[str, Any] = {"mtime": None, "data": {}}
+
+    @classmethod
+    def get_root_config_path(cls) -> Path:
+        return Path(__file__).resolve().parents[2] / "config.yaml"
+
+    @classmethod
+    def load_root_config(cls) -> Dict[str, Any]:
+        path = cls.get_root_config_path()
+        if not path.exists():
+            return {}
+
+        stat = path.stat()
+        if cls._cache["mtime"] == stat.st_mtime and cls._cache["data"]:
+            return cls._cache["data"]
+
+        with open(path, "r", encoding="utf-8") as fp:
+            data = yaml.safe_load(fp) or {}
+        cls._cache = {"mtime": stat.st_mtime, "data": data}
+        return data
+
+    @classmethod
+    def get_llm_config(cls) -> Dict[str, Any]:
+        config = cls.load_root_config()
+        llm_config = config.get("llm", {}) or {}
+        return llm_config if isinstance(llm_config, dict) else {}
+
+    @classmethod
+    def get_backend(cls, backend_name: str) -> Dict[str, Any]:
+        if not backend_name:
+            return {}
+        llm_config = cls.get_llm_config()
+        backends = llm_config.get("backends", {}) or {}
+        backend = backends.get(backend_name, {}) or {}
+        return dict(backend) if isinstance(backend, dict) else {}
+
+    @classmethod
+    def resolve(cls, local_config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        local = dict(local_config or {})
+        backend_name = (
+            local.get("backend")
+            or local.get("backend_name")
+            or local.get("backend_ref")
+            or ""
+        )
+        if not backend_name:
+            return local
+
+        merged = cls.get_backend(str(backend_name).strip())
+        merged.update(local)
+        merged["backend"] = backend_name
+        return merged
+
--- a/utils/ai/unified_llm.py
+++ b/utils/ai/unified_llm.py
@@ -0,0 +1,540 @@
+from __future__ import annotations
+
+import json
+import time
+from typing import Any, Dict, List, Optional, Tuple
+from urllib.parse import urlparse
+
+import requests
+from loguru import logger
+
+from utils.ai.llm_registry import LLMRegistry
+
+
+class UnifiedLLMClient:
+    """统一的 LLM 调用客户端，兼容 OpenAI-compatible 与 Dify。"""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        self.LOG = logger
+        self.raw_config = config or {}
+        self.config = self._normalize_config(self.raw_config)
+        self.enabled = bool(self.config.get("enabled", True))
+        self.provider = str(self.config.get("provider", "openai_compatible")).strip().lower()
+        self.base_url = str(self.config.get("base_url", "")).rstrip("/")
+        self.endpoint = str(self.config.get("endpoint", "")).lstrip("/")
+        self.api_key = str(self.config.get("api_key", "")).strip()
+        self.model = str(self.config.get("model", "")).strip()
+        self.timeout_seconds = int(self.config.get("timeout_seconds", 60))
+        self.timeout = self.timeout_seconds
+        self.temperature = float(self.config.get("temperature", 0.7))
+        self.max_tokens = int(self.config.get("max_tokens", 1024))
+        self.stream = bool(self.config.get("stream", False))
+        self.max_retries = max(int(self.config.get("max_retries", 3) or 3), 1)
+        self.retry_delay_seconds = float(self.config.get("retry_delay_seconds", 1.0) or 1.0)
+        self.mode = str(self.config.get("mode", "chat")).strip().lower()
+        self.response_mode = str(self.config.get("response_mode", "blocking")).strip().lower()
+        self.workflow_output_key = str(self.config.get("workflow_output_key", "text")).strip()
+        self.default_system_prompt = str(self.config.get("system_prompt", "")).strip()
+        self.last_error = ""
+
+    def is_available(self) -> bool:
+        if not self.enabled:
+            return False
+        if self.provider == "openai_compatible":
+            return bool(self.base_url and self.endpoint and self.model)
+        if self.provider == "dify":
+            return bool(self.base_url and self.endpoint and self.api_key)
+        return False
+
+    def chat(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        user_id: str,
+        image_urls: Optional[List[str]] = None,
+    ) -> str:
+        result = self.generate(
+            system_prompt=system_prompt,
+            user_prompt=user_prompt,
+            user=user_id,
+            image_urls=image_urls or [],
+        )
+        return (result or {}).get("text", "") or ""
+
+    def run(
+        self,
+        prompt: str,
+        user: str,
+        inputs: Optional[Dict[str, Any]] = None,
+        tag: str = "",
+    ) -> Optional[Dict[str, Any]]:
+        if self.provider == "dify":
+            return self.generate(prompt=prompt, user=user, inputs=inputs or {}, tag=tag)
+
+        effective_prompt = prompt or self._stringify_inputs(inputs or {})
+        return self.generate(
+            system_prompt=self.default_system_prompt,
+            user_prompt=effective_prompt,
+            user=user,
+            inputs=inputs or {},
+            tag=tag,
+        )
+
+    def generate(
+        self,
+        prompt: str = "",
+        user: str = "",
+        inputs: Optional[Dict[str, Any]] = None,
+        tag: str = "",
+        system_prompt: str = "",
+        user_prompt: str = "",
+        image_urls: Optional[List[str]] = None,
+        files: Optional[List[Dict[str, Any]]] = None,
+    ) -> Optional[Dict[str, Any]]:
+        self.last_error = ""
+        if not self.is_available():
+            self.last_error = "client_unavailable"
+            return None
+
+        if self.provider == "dify":
+            return self._generate_dify(
+                prompt=prompt,
+                user=user,
+                inputs=inputs or {},
+                tag=tag,
+                files=files or [],
+            )
+        if self.provider == "openai_compatible":
+            return self._generate_openai(
+                system_prompt=system_prompt,
+                user_prompt=user_prompt or prompt,
+                user=user,
+                image_urls=image_urls or [],
+            )
+
+        self.last_error = f"unsupported_provider:{self.provider}"
+        return None
+
+    def _generate_openai(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        user: str,
+        image_urls: List[str],
+    ) -> Optional[Dict[str, Any]]:
+        payload = {
+            "model": self.model,
+            "messages": self._build_messages(system_prompt or self.default_system_prompt, user_prompt, image_urls),
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+            "user": user,
+            "stream": self.stream,
+        }
+        headers = {"Content-Type": "application/json"}
+        if self.api_key:
+            headers["Authorization"] = self._build_auth_header(self.api_key)
+
+        url = f"{self.base_url}/{self.endpoint}"
+        for attempt in range(1, self.max_retries + 1):
+            try:
+                if self.stream:
+                    text, raw = self._request_openai_stream(url, payload, headers)
+                else:
+                    text, raw = self._request_openai_json(url, payload, headers)
+                if text:
+                    return {
+                        "text": text,
+                        "usage": self._extract_openai_usage(raw),
+                        "raw": raw,
+                    }
+                self.last_error = f"empty_model_output:{self.model}"
+            except Exception as exc:
+                self.last_error = f"request_failed:attempt_{attempt}:{exc}"
+            if attempt < self.max_retries:
+                time.sleep(self.retry_delay_seconds * attempt)
+        return None
+
+    def _generate_dify(
+        self,
+        prompt: str,
+        user: str,
+        inputs: Dict[str, Any],
+        tag: str,
+        files: List[Dict[str, Any]],
+    ) -> Optional[Dict[str, Any]]:
+        headers = {
+            "Authorization": self._build_auth_header(self.api_key),
+            "Content-Type": "application/json",
+        }
+        payload_inputs = dict(inputs or {})
+        if self.mode == "workflow":
+            if prompt and "query" not in payload_inputs:
+                payload_inputs["query"] = prompt
+            payload = {
+                "inputs": payload_inputs,
+                "response_mode": self.response_mode,
+                "user": user,
+                "files": files,
+            }
+        elif self.mode == "completion":
+            payload = {
+                "inputs": payload_inputs,
+                "query": prompt,
+                "response_mode": self.response_mode,
+                "user": user,
+                "files": files,
+            }
+        else:
+            payload = {
+                "inputs": payload_inputs,
+                "query": prompt,
+                "response_mode": self.response_mode,
+                "conversation_id": "",
+                "user": user,
+                "files": files,
+            }
+
+        url = f"{self.base_url}/{self.endpoint}"
+        for attempt in range(1, self.max_retries + 1):
+            try:
+                if self.response_mode == "streaming":
+                    parsed = self._request_dify_stream(url, payload, headers, tag)
+                else:
+                    response = requests.post(url, headers=headers, json=payload, timeout=self.timeout_seconds)
+                    response.raise_for_status()
+                    parsed = self._parse_dify_response(response.json())
+                if parsed and parsed.get("text"):
+                    return parsed
+                self.last_error = f"empty_model_output:{self.mode}"
+            except Exception as exc:
+                self.last_error = f"request_failed:attempt_{attempt}:{exc}"
+                self.LOG.warning(f"[UnifiedLLMClient] Dify 请求失败: tag={tag}, attempt={attempt}, error={exc}")
+            if attempt < self.max_retries:
+                time.sleep(self.retry_delay_seconds * attempt)
+        return None
+
+    def _request_openai_json(self, url: str, payload: Dict[str, Any], headers: Dict[str, str]) -> Tuple[str, Dict[str, Any]]:
+        response = requests.post(url, json=payload, headers=headers, timeout=self.timeout_seconds)
+        response.raise_for_status()
+        data = response.json()
+        return self._extract_openai_text(data), data
+
+    def _request_openai_stream(
+        self,
+        url: str,
+        payload: Dict[str, Any],
+        headers: Dict[str, str],
+    ) -> Tuple[str, Dict[str, Any]]:
+        chunks: List[str] = []
+        with requests.post(url, json=payload, headers=headers, timeout=self.timeout_seconds, stream=True) as response:
+            response.raise_for_status()
+            buffer = b""
+            for part in response.iter_content(chunk_size=None):
+                if not part:
+                    continue
+                buffer += part
+                while b"\n\n" in buffer:
+                    event, buffer = buffer.split(b"\n\n", 1)
+                    try:
+                        text_piece, done = self._parse_openai_sse_event(event.decode("utf-8"))
+                    except UnicodeDecodeError:
+                        buffer = event + b"\n\n" + buffer
+                        break
+                    if text_piece:
+                        chunks.append(text_piece)
+                    if done:
+                        break
+        return "".join(chunks).strip(), {"stream_text": "".join(chunks).strip()}
+
+    def _request_dify_stream(
+        self,
+        url: str,
+        payload: Dict[str, Any],
+        headers: Dict[str, str],
+        tag: str,
+    ) -> Optional[Dict[str, Any]]:
+        with requests.post(url, headers=headers, json=payload, timeout=self.timeout_seconds, stream=True) as response:
+            response.raise_for_status()
+            event_name = ""
+            text_fragments: List[str] = []
+            final_payload = None
+
+            for raw_line in response.iter_lines(decode_unicode=True):
+                if raw_line is None:
+                    continue
+                line = str(raw_line).strip()
+                if not line:
+                    continue
+                if line.startswith("event:"):
+                    event_name = line[6:].strip()
+                    continue
+                if not line.startswith("data:"):
+                    continue
+
+                data_text = line[5:].strip()
+                if not data_text or data_text == "[DONE]":
+                    continue
+                try:
+                    chunk = json.loads(data_text)
+                except Exception:
+                    continue
+
+                candidate_text = self._extract_dify_stream_text(chunk)
+                if candidate_text:
+                    text_fragments.append(candidate_text)
+
+                chunk_event = str(chunk.get("event") or event_name or "").strip()
+                if chunk_event in {"workflow_finished", "message_end"}:
+                    final_payload = chunk
+
+            if final_payload:
+                parsed = self._parse_dify_response(final_payload)
+                if parsed and parsed.get("text"):
+                    return parsed
+
+            text = "".join(fragment for fragment in text_fragments if fragment).strip()
+            if text:
+                return {"text": text, "usage": {}, "raw": final_payload or {}}
+
+            self.LOG.warning(f"[UnifiedLLMClient] Dify 流式响应未产出有效内容: tag={tag}")
+            return None
+
+    @staticmethod
+    def _build_messages(system_prompt: str, user_prompt: str, image_urls: List[str]) -> List[Dict[str, Any]]:
+        user_content: str | List[Dict[str, Any]]
+        if image_urls:
+            content_parts: List[Dict[str, Any]] = [{"type": "text", "text": user_prompt}]
+            for image_url in image_urls:
+                if image_url:
+                    content_parts.append({"type": "image_url", "image_url": {"url": image_url}})
+            user_content = content_parts
+        else:
+            user_content = user_prompt
+        messages: List[Dict[str, Any]] = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": user_content})
+        return messages
+
+    @staticmethod
+    def _extract_openai_text(data: Dict[str, Any]) -> str:
+        choices = data.get("choices") or []
+        if choices:
+            message = choices[0].get("message", {}) or {}
+            content = message.get("content")
+            if isinstance(content, str) and content.strip():
+                return content.strip()
+            if isinstance(content, list):
+                parts = []
+                for item in content:
+                    if isinstance(item, dict):
+                        text = item.get("text") or item.get("content")
+                        if isinstance(text, str) and text.strip():
+                            parts.append(text.strip())
+                if parts:
+                    return "\n".join(parts).strip()
+            for key in ("reasoning_content", "text", "output_text"):
+                value = message.get(key)
+                if isinstance(value, str) and value.strip():
+                    return value.strip()
+        for key in ("output_text", "text", "answer", "response"):
+            value = data.get(key)
+            if isinstance(value, str) and value.strip():
+                return value.strip()
+        return ""
+
+    @classmethod
+    def _parse_openai_sse_event(cls, event_text: str) -> Tuple[str, bool]:
+        lines = [line.strip() for line in event_text.splitlines() if line.strip()]
+        data_lines = [line[5:].strip() for line in lines if line.startswith("data:")]
+        if not data_lines:
+            return "", False
+        data = "\n".join(data_lines)
+        if data == "[DONE]":
+            return "", True
+        obj = json.loads(data)
+        choice = (obj.get("choices") or [{}])[0]
+        delta = choice.get("delta") or {}
+        content = delta.get("content")
+        if isinstance(content, str):
+            return content, False
+        if isinstance(content, list):
+            parts = []
+            for item in content:
+                if isinstance(item, dict):
+                    text = item.get("text") or item.get("content")
+                    if isinstance(text, str):
+                        parts.append(text)
+            return "".join(parts), False
+        return "", False
+
+    def _parse_dify_response(self, data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        if self.mode == "workflow":
+            return self._parse_dify_workflow_response(data)
+        answer = str(data.get("answer", "") or "").strip()
+        usage = (data.get("metadata") or {}).get("usage", {}) or {}
+        return {"text": answer, "usage": usage, "raw": data}
+
+    def _parse_dify_workflow_response(self, data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        payload = (data or {}).get("data", {}) or {}
+        outputs = payload.get("outputs", {}) or {}
+        text = ""
+
+        for key in filter(None, [self.workflow_output_key, "text", "answer", "result_json", "result"]):
+            if outputs.get(key) is not None:
+                text = self._stringify_output(outputs.get(key))
+                if text:
+                    break
+
+        if not text:
+            for value in outputs.values():
+                text = self._stringify_output(value)
+                if text:
+                    break
+
+        usage = {
+            "total_tokens": payload.get("total_tokens"),
+            "latency": payload.get("elapsed_time"),
+        }
+        return {"text": text.strip(), "usage": usage, "raw": data}
+
+    def _extract_dify_stream_text(self, chunk: Dict[str, Any]) -> str:
+        if not isinstance(chunk, dict):
+            return ""
+        payload = (chunk.get("data") or {}) if isinstance(chunk.get("data"), dict) else {}
+        outputs = payload.get("outputs", {}) if isinstance(payload.get("outputs"), dict) else {}
+
+        for key in filter(None, [self.workflow_output_key, "text", "answer", "result_json", "result"]):
+            if outputs.get(key) is not None:
+                return self._stringify_output(outputs.get(key))
+
+        for key in ("text", "answer"):
+            if chunk.get(key) is not None:
+                return self._stringify_output(chunk.get(key))
+
+        return ""
+
+    @staticmethod
+    def _extract_openai_usage(data: Dict[str, Any]) -> Dict[str, Any]:
+        usage = data.get("usage", {}) or {}
+        if usage:
+            return usage
+        return {}
+
+    @staticmethod
+    def _stringify_output(value: Any) -> str:
+        if value is None:
+            return ""
+        if isinstance(value, str):
+            return value.strip()
+        if isinstance(value, (dict, list)):
+            return json.dumps(value, ensure_ascii=False)
+        return str(value).strip()
+
+    @classmethod
+    def _normalize_config(cls, config: Dict[str, Any]) -> Dict[str, Any]:
+        normalized = LLMRegistry.resolve(config or {})
+        normalized["enabled"] = bool(
+            normalized.get("enabled", normalized.get("enable", True))
+        )
+
+        if not normalized.get("provider"):
+            normalized["provider"] = cls._guess_provider(normalized)
+
+        parsed_url = cls._split_url(
+            normalized.get("api_url")
+            or normalized.get("url")
+        )
+        base_url = (
+            normalized.get("base_url")
+            or normalized.get("api_base_url")
+            or parsed_url[0]
+            or ""
+        )
+        endpoint = (
+            normalized.get("endpoint")
+            or parsed_url[1]
+            or ""
+        )
+
+        normalized["base_url"] = str(base_url).rstrip("/")
+        normalized["endpoint"] = str(endpoint).lstrip("/")
+        normalized["api_key"] = (
+            normalized.get("api_key")
+            or normalized.get("api-key")
+            or normalized.get("authorization")
+            or ""
+        )
+        normalized["timeout_seconds"] = int(
+            normalized.get("timeout_seconds")
+            or normalized.get("request_timeout_seconds")
+            or normalized.get("request_timeout")
+            or 60
+        )
+        normalized["max_retries"] = int(normalized.get("max_retries", len(normalized.get("retry_delays_seconds", [])) + 1 or 3))
+        normalized["retry_delay_seconds"] = float(normalized.get("retry_delay_seconds", 1.0))
+        normalized["response_mode"] = normalized.get("response_mode", "blocking")
+        normalized["workflow_output_key"] = normalized.get("workflow_output_key", "text")
+
+        if normalized["provider"] == "dify":
+            default_endpoint = cls._guess_dify_endpoint(normalized)
+            if not normalized["endpoint"]:
+                normalized["endpoint"] = default_endpoint
+        else:
+            if not normalized["endpoint"]:
+                normalized["endpoint"] = "chat/completions"
+
+        return normalized
+
+    @staticmethod
+    def _guess_provider(config: Dict[str, Any]) -> str:
+        api_key = str(
+            config.get("api_key")
+            or config.get("api-key")
+            or config.get("authorization")
+            or ""
+        ).strip()
+        url = str(config.get("api_url") or config.get("url") or config.get("endpoint") or "").lower()
+        mode = str(config.get("mode", "")).lower()
+        if "workflows/run" in url or "chat-messages" in url or "completion-messages" in url:
+            return "dify"
+        if api_key.startswith("app-") or mode in {"workflow", "completion"}:
+            return "dify"
+        return "openai_compatible"
+
+    @staticmethod
+    def _guess_dify_endpoint(config: Dict[str, Any]) -> str:
+        mode = str(config.get("mode", "chat")).strip().lower()
+        if mode == "workflow":
+            return "workflows/run"
+        if mode == "completion":
+            return "completion-messages"
+        return "chat-messages"
+
+    @staticmethod
+    def _split_url(url: Optional[str]) -> Tuple[str, str]:
+        if not url:
+            return "", ""
+        parsed = urlparse(str(url))
+        if not parsed.scheme or not parsed.netloc:
+            return "", str(url)
+        base = f"{parsed.scheme}://{parsed.netloc}"
+        return base, parsed.path.lstrip("/")
+
+    @staticmethod
+    def _build_auth_header(value: str) -> str:
+        token = str(value or "").strip()
+        if not token:
+            return ""
+        if token.lower().startswith("bearer "):
+            return token
+        return f"Bearer {token}"
+
+    @staticmethod
+    def _stringify_inputs(inputs: Dict[str, Any]) -> str:
+        if not inputs:
+            return ""
+        try:
+            return json.dumps(inputs, ensure_ascii=False)
+        except Exception:
+            return str(inputs)