完善系统健康面板并接入AI运行态观测

2026-04-30 15:12:47 +08:00
parent 83910b287b
commit 4ddab01b8d
4 changed files with 206 additions and 9 deletions
--- a/utils/ai/unified_llm.py
+++ b/utils/ai/unified_llm.py
@@ -5,6 +5,8 @@ import binascii
 import json
 import mimetypes
 import time
+from collections import deque
+from threading import Lock
 from typing import Any, Dict, List, Optional, Tuple
 from urllib.parse import urlparse

@@ -18,6 +20,13 @@ from utils.ai.llm_registry import LLMRegistry
 class UnifiedLLMClient:
    """统一的 LLM 调用客户端，兼容 OpenAI-compatible 与 Dify。"""

+    # 运行时观测快照：
+    # 1. 只保留最近一小段调用窗口，避免无限增长；
+    # 2. 放在统一客户端层，所有复用该客户端的插件天然受益；
+    # 3. 这里存的不是业务明细，而是运维看板需要的轻量健康指标。
+    _runtime_metrics = deque(maxlen=50)
+    _runtime_lock = Lock()
+
    def __init__(self, config: Optional[Dict[str, Any]] = None):
        self.LOG = logger
        self.raw_config = config or {}
@@ -41,6 +50,62 @@ class UnifiedLLMClient:
        self.default_system_prompt = str(self.config.get("system_prompt", "")).strip()
        self.last_error = ""

+    @classmethod
+    def _record_runtime_metric(
+        cls,
+        *,
+        provider: str,
+        backend: str,
+        scene: str,
+        model: str,
+        success: bool,
+        latency_ms: float,
+        error: str = "",
+    ) -> None:
+        """记录最近一次 LLM 调用结果，供后台健康面板聚合展示。"""
+        with cls._runtime_lock:
+            cls._runtime_metrics.append({
+                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+                "provider": str(provider or "").strip(),
+                "backend": str(backend or "").strip(),
+                "scene": str(scene or "").strip(),
+                "model": str(model or "").strip(),
+                "success": bool(success),
+                "latency_ms": round(float(latency_ms or 0.0), 2),
+                "error": str(error or "").strip()[:300],
+            })
+
+    @classmethod
+    def get_runtime_snapshot(cls) -> Dict[str, Any]:
+        """返回最近调用窗口的聚合快照，供后台可观测性接口直接复用。"""
+        with cls._runtime_lock:
+            rows = list(cls._runtime_metrics)
+
+        total_calls = len(rows)
+        success_calls = sum(1 for item in rows if item.get("success"))
+        failed_calls = total_calls - success_calls
+        avg_latency_ms = round(
+            sum(float(item.get("latency_ms") or 0.0) for item in rows) / total_calls,
+            2
+        ) if total_calls else 0.0
+        last_call = rows[-1] if rows else {}
+        last_error = ""
+        for item in reversed(rows):
+            if not item.get("success") and item.get("error"):
+                last_error = str(item.get("error") or "").strip()
+                break
+
+        return {
+            "window_size": cls._runtime_metrics.maxlen,
+            "total_calls": total_calls,
+            "success_calls": success_calls,
+            "failed_calls": failed_calls,
+            "success_rate": round((success_calls / total_calls) * 100, 2) if total_calls else 0.0,
+            "avg_latency_ms": avg_latency_ms,
+            "last_call": last_call,
+            "last_error": last_error,
+        }
+
    def is_available(self) -> bool:
        if not self.enabled:
            return False
@@ -168,29 +233,50 @@ class UnifiedLLMClient:
        image_urls: Optional[List[str]] = None,
        files: Optional[List[Dict[str, Any]]] = None,
    ) -> Optional[Dict[str, Any]]:
+        started_at = time.monotonic()
        self.last_error = ""
+        result: Optional[Dict[str, Any]] = None
        if not self.is_available():
            self.last_error = "client_unavailable"
-            return None
-
-        if self.provider == "dify":
-            return self._generate_dify(
+        elif self.provider == "dify":
+            result = self._generate_dify(
                prompt=prompt,
                user=user,
                inputs=inputs or {},
                tag=tag,
                files=files or [],
            )
-        if self.provider == "openai_compatible":
-            return self._generate_openai(
+        elif self.provider == "openai_compatible":
+            result = self._generate_openai(
                system_prompt=system_prompt,
                user_prompt=user_prompt or prompt,
                user=user,
                image_urls=image_urls or [],
            )
+        else:
+            self.last_error = f"unsupported_provider:{self.provider}"

-        self.last_error = f"unsupported_provider:{self.provider}"
-        return None
+        # 统一在出口记录运行时快照，避免每种 provider 都重复埋点逻辑。
+        usage = (result or {}).get("usage", {}) if isinstance(result, dict) else {}
+        latency_ms = 0.0
+        if isinstance(usage, dict) and usage.get("latency") not in (None, ""):
+            try:
+                latency_ms = float(usage.get("latency")) * 1000
+            except Exception:
+                latency_ms = 0.0
+        if latency_ms <= 0:
+            latency_ms = (time.monotonic() - started_at) * 1000
+
+        self._record_runtime_metric(
+            provider=self.provider,
+            backend=str(self.config.get("backend", "") or ""),
+            scene=str(self.config.get("scene", "") or ""),
+            model=self.model or str(self.mode or ""),
+            success=bool(result and result.get("text")),
+            latency_ms=latency_ms,
+            error=self.last_error,
+        )
+        return result

    def _generate_openai(
        self,