完善系统健康面板并接入AI运行态观测
This commit is contained in:
@@ -5,6 +5,8 @@ import binascii
|
||||
import json
|
||||
import mimetypes
|
||||
import time
|
||||
from collections import deque
|
||||
from threading import Lock
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
@@ -18,6 +20,13 @@ from utils.ai.llm_registry import LLMRegistry
|
||||
class UnifiedLLMClient:
|
||||
"""统一的 LLM 调用客户端,兼容 OpenAI-compatible 与 Dify。"""
|
||||
|
||||
# 运行时观测快照:
|
||||
# 1. 只保留最近一小段调用窗口,避免无限增长;
|
||||
# 2. 放在统一客户端层,所有复用该客户端的插件天然受益;
|
||||
# 3. 这里存的不是业务明细,而是运维看板需要的轻量健康指标。
|
||||
_runtime_metrics = deque(maxlen=50)
|
||||
_runtime_lock = Lock()
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
self.LOG = logger
|
||||
self.raw_config = config or {}
|
||||
@@ -41,6 +50,62 @@ class UnifiedLLMClient:
|
||||
self.default_system_prompt = str(self.config.get("system_prompt", "")).strip()
|
||||
self.last_error = ""
|
||||
|
||||
@classmethod
|
||||
def _record_runtime_metric(
|
||||
cls,
|
||||
*,
|
||||
provider: str,
|
||||
backend: str,
|
||||
scene: str,
|
||||
model: str,
|
||||
success: bool,
|
||||
latency_ms: float,
|
||||
error: str = "",
|
||||
) -> None:
|
||||
"""记录最近一次 LLM 调用结果,供后台健康面板聚合展示。"""
|
||||
with cls._runtime_lock:
|
||||
cls._runtime_metrics.append({
|
||||
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"provider": str(provider or "").strip(),
|
||||
"backend": str(backend or "").strip(),
|
||||
"scene": str(scene or "").strip(),
|
||||
"model": str(model or "").strip(),
|
||||
"success": bool(success),
|
||||
"latency_ms": round(float(latency_ms or 0.0), 2),
|
||||
"error": str(error or "").strip()[:300],
|
||||
})
|
||||
|
||||
@classmethod
|
||||
def get_runtime_snapshot(cls) -> Dict[str, Any]:
|
||||
"""返回最近调用窗口的聚合快照,供后台可观测性接口直接复用。"""
|
||||
with cls._runtime_lock:
|
||||
rows = list(cls._runtime_metrics)
|
||||
|
||||
total_calls = len(rows)
|
||||
success_calls = sum(1 for item in rows if item.get("success"))
|
||||
failed_calls = total_calls - success_calls
|
||||
avg_latency_ms = round(
|
||||
sum(float(item.get("latency_ms") or 0.0) for item in rows) / total_calls,
|
||||
2
|
||||
) if total_calls else 0.0
|
||||
last_call = rows[-1] if rows else {}
|
||||
last_error = ""
|
||||
for item in reversed(rows):
|
||||
if not item.get("success") and item.get("error"):
|
||||
last_error = str(item.get("error") or "").strip()
|
||||
break
|
||||
|
||||
return {
|
||||
"window_size": cls._runtime_metrics.maxlen,
|
||||
"total_calls": total_calls,
|
||||
"success_calls": success_calls,
|
||||
"failed_calls": failed_calls,
|
||||
"success_rate": round((success_calls / total_calls) * 100, 2) if total_calls else 0.0,
|
||||
"avg_latency_ms": avg_latency_ms,
|
||||
"last_call": last_call,
|
||||
"last_error": last_error,
|
||||
}
|
||||
|
||||
def is_available(self) -> bool:
|
||||
if not self.enabled:
|
||||
return False
|
||||
@@ -168,29 +233,50 @@ class UnifiedLLMClient:
|
||||
image_urls: Optional[List[str]] = None,
|
||||
files: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
started_at = time.monotonic()
|
||||
self.last_error = ""
|
||||
result: Optional[Dict[str, Any]] = None
|
||||
if not self.is_available():
|
||||
self.last_error = "client_unavailable"
|
||||
return None
|
||||
|
||||
if self.provider == "dify":
|
||||
return self._generate_dify(
|
||||
elif self.provider == "dify":
|
||||
result = self._generate_dify(
|
||||
prompt=prompt,
|
||||
user=user,
|
||||
inputs=inputs or {},
|
||||
tag=tag,
|
||||
files=files or [],
|
||||
)
|
||||
if self.provider == "openai_compatible":
|
||||
return self._generate_openai(
|
||||
elif self.provider == "openai_compatible":
|
||||
result = self._generate_openai(
|
||||
system_prompt=system_prompt,
|
||||
user_prompt=user_prompt or prompt,
|
||||
user=user,
|
||||
image_urls=image_urls or [],
|
||||
)
|
||||
else:
|
||||
self.last_error = f"unsupported_provider:{self.provider}"
|
||||
|
||||
self.last_error = f"unsupported_provider:{self.provider}"
|
||||
return None
|
||||
# 统一在出口记录运行时快照,避免每种 provider 都重复埋点逻辑。
|
||||
usage = (result or {}).get("usage", {}) if isinstance(result, dict) else {}
|
||||
latency_ms = 0.0
|
||||
if isinstance(usage, dict) and usage.get("latency") not in (None, ""):
|
||||
try:
|
||||
latency_ms = float(usage.get("latency")) * 1000
|
||||
except Exception:
|
||||
latency_ms = 0.0
|
||||
if latency_ms <= 0:
|
||||
latency_ms = (time.monotonic() - started_at) * 1000
|
||||
|
||||
self._record_runtime_metric(
|
||||
provider=self.provider,
|
||||
backend=str(self.config.get("backend", "") or ""),
|
||||
scene=str(self.config.get("scene", "") or ""),
|
||||
model=self.model or str(self.mode or ""),
|
||||
success=bool(result and result.get("text")),
|
||||
latency_ms=latency_ms,
|
||||
error=self.last_error,
|
||||
)
|
||||
return result
|
||||
|
||||
def _generate_openai(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user