回滚本地LLM运行分析并对齐远端回滚状态
This commit is contained in:
@@ -16,7 +16,6 @@ from utils.markdown_to_image import get_md2img_health_snapshot, warmup_md2img_br
|
|||||||
from utils.ai.llm_registry import LLMRegistry
|
from utils.ai.llm_registry import LLMRegistry
|
||||||
from base.plugin_common.plugin_interface import PluginStatus
|
from base.plugin_common.plugin_interface import PluginStatus
|
||||||
from utils.ai.unified_llm import UnifiedLLMClient
|
from utils.ai.unified_llm import UnifiedLLMClient
|
||||||
from utils.decorator.async_job import async_job
|
|
||||||
|
|
||||||
# 创建系统信息蓝图
|
# 创建系统信息蓝图
|
||||||
system_bp = Blueprint('system', __name__)
|
system_bp = Blueprint('system', __name__)
|
||||||
@@ -43,506 +42,6 @@ def _save_system_yaml(config_obj: dict) -> None:
|
|||||||
yaml.safe_dump(config_obj, f, allow_unicode=True, sort_keys=False)
|
yaml.safe_dump(config_obj, f, allow_unicode=True, sort_keys=False)
|
||||||
|
|
||||||
|
|
||||||
def _safe_int(value, default: int = 0) -> int:
|
|
||||||
"""把数据库 / Redis 返回的字符串数字安全转成整数。"""
|
|
||||||
try:
|
|
||||||
if value in (None, ""):
|
|
||||||
return default
|
|
||||||
return int(float(value))
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
return default
|
|
||||||
|
|
||||||
|
|
||||||
def _safe_float(value, default: float = 0.0) -> float:
|
|
||||||
"""把数据库 / Redis 返回的值安全转成浮点数。"""
|
|
||||||
try:
|
|
||||||
if value in (None, ""):
|
|
||||||
return default
|
|
||||||
return float(value)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
return default
|
|
||||||
|
|
||||||
|
|
||||||
def _format_bytes_to_mb(value: int) -> float:
|
|
||||||
"""把字节数转换为 MB,保留两位小数便于首页摘要展示。"""
|
|
||||||
return round((_safe_float(value, 0.0) / 1024 / 1024), 2)
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_mysql_runtime_snapshot(db_manager) -> dict:
|
|
||||||
"""采集 MySQL 运行态摘要。
|
|
||||||
|
|
||||||
首页目标不是替代 DBA 工具,而是让管理员一眼判断:
|
|
||||||
1. 数据库是不是活着;
|
|
||||||
2. 当前连接压力高不高;
|
|
||||||
3. 当前库规模是否已经明显变大;
|
|
||||||
4. 有没有必要继续深入到更专业的监控页排查。
|
|
||||||
"""
|
|
||||||
snapshot = {
|
|
||||||
"status": "healthy",
|
|
||||||
"summary": "连接正常",
|
|
||||||
"database": db_manager.get_mysql_database_name(),
|
|
||||||
"version": "",
|
|
||||||
"threads_connected": 0,
|
|
||||||
"threads_running": 0,
|
|
||||||
"max_connections": 0,
|
|
||||||
"connection_usage_percent": 0.0,
|
|
||||||
"questions_per_second": 0.0,
|
|
||||||
"uptime_seconds": 0,
|
|
||||||
"table_count": 0,
|
|
||||||
"schema_size_mb": 0.0,
|
|
||||||
"slow_query_threshold_ms": db_manager.get_slow_query_threshold_ms(),
|
|
||||||
}
|
|
||||||
|
|
||||||
mysql_conn = db_manager.get_mysql_connection()
|
|
||||||
try:
|
|
||||||
with mysql_conn.cursor(dictionary=True) as cursor:
|
|
||||||
# 基础探活与版本识别:
|
|
||||||
# 1. SELECT VERSION() 成本极低;
|
|
||||||
# 2. 相比只做 SELECT 1,它还能顺便拿到版本信息;
|
|
||||||
# 3. 首页卡片里显示版本,方便线上排查“是不是某台库版本不一致”。
|
|
||||||
cursor.execute("SELECT VERSION() AS version, DATABASE() AS database_name")
|
|
||||||
version_row = cursor.fetchone() or {}
|
|
||||||
snapshot["version"] = str(version_row.get("version") or "").strip()
|
|
||||||
snapshot["database"] = str(version_row.get("database_name") or snapshot["database"] or "").strip()
|
|
||||||
|
|
||||||
cursor.execute(
|
|
||||||
"""
|
|
||||||
SHOW GLOBAL STATUS
|
|
||||||
WHERE Variable_name IN ('Threads_connected', 'Threads_running', 'Questions', 'Uptime')
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
status_rows = cursor.fetchall() or []
|
|
||||||
status_map = {
|
|
||||||
str(row.get("Variable_name") or "").strip(): row.get("Value")
|
|
||||||
for row in status_rows
|
|
||||||
}
|
|
||||||
|
|
||||||
cursor.execute(
|
|
||||||
"""
|
|
||||||
SHOW GLOBAL VARIABLES
|
|
||||||
WHERE Variable_name IN ('max_connections')
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
variable_rows = cursor.fetchall() or []
|
|
||||||
variable_map = {
|
|
||||||
str(row.get("Variable_name") or "").strip(): row.get("Value")
|
|
||||||
for row in variable_rows
|
|
||||||
}
|
|
||||||
|
|
||||||
# information_schema 聚合虽然比 SELECT 1 重一点,但仍属于轻量级元信息查询:
|
|
||||||
# 1. 只在首页 30 秒级刷新一次,成本可接受;
|
|
||||||
# 2. 能直接给出当前业务库表数量与体量变化;
|
|
||||||
# 3. 对判断“是不是消息表膨胀导致后台变慢”很有帮助。
|
|
||||||
cursor.execute(
|
|
||||||
"""
|
|
||||||
SELECT
|
|
||||||
COUNT(*) AS table_count,
|
|
||||||
COALESCE(SUM(data_length + index_length), 0) AS schema_size_bytes
|
|
||||||
FROM information_schema.tables
|
|
||||||
WHERE table_schema = DATABASE()
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
schema_row = cursor.fetchone() or {}
|
|
||||||
|
|
||||||
snapshot["threads_connected"] = _safe_int(status_map.get("Threads_connected"))
|
|
||||||
snapshot["threads_running"] = _safe_int(status_map.get("Threads_running"))
|
|
||||||
snapshot["max_connections"] = _safe_int(variable_map.get("max_connections"))
|
|
||||||
snapshot["uptime_seconds"] = _safe_int(status_map.get("Uptime"))
|
|
||||||
total_questions = _safe_int(status_map.get("Questions"))
|
|
||||||
if snapshot["uptime_seconds"] > 0:
|
|
||||||
snapshot["questions_per_second"] = round(total_questions / snapshot["uptime_seconds"], 2)
|
|
||||||
if snapshot["max_connections"] > 0:
|
|
||||||
snapshot["connection_usage_percent"] = round(
|
|
||||||
(snapshot["threads_connected"] / snapshot["max_connections"]) * 100,
|
|
||||||
1,
|
|
||||||
)
|
|
||||||
snapshot["table_count"] = _safe_int(schema_row.get("table_count"))
|
|
||||||
snapshot["schema_size_mb"] = _format_bytes_to_mb(schema_row.get("schema_size_bytes"))
|
|
||||||
|
|
||||||
if snapshot["connection_usage_percent"] >= 80 or snapshot["threads_running"] >= 12:
|
|
||||||
snapshot["status"] = "warning"
|
|
||||||
snapshot["summary"] = (
|
|
||||||
f"连接压力偏高:已连接 {snapshot['threads_connected']} / {snapshot['max_connections']},"
|
|
||||||
f"运行中线程 {snapshot['threads_running']}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
snapshot["summary"] = (
|
|
||||||
f"连接正常:已连接 {snapshot['threads_connected']} / {snapshot['max_connections'] or '-'},"
|
|
||||||
f"QPS {snapshot['questions_per_second']}"
|
|
||||||
)
|
|
||||||
return snapshot
|
|
||||||
except Exception as mysql_error:
|
|
||||||
snapshot["status"] = "danger"
|
|
||||||
snapshot["summary"] = f"MySQL 探测失败: {mysql_error}"
|
|
||||||
return snapshot
|
|
||||||
finally:
|
|
||||||
mysql_conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_redis_runtime_snapshot(db_manager) -> dict:
|
|
||||||
"""采集 Redis 运行态摘要。"""
|
|
||||||
redis_config = getattr(db_manager, "redis_config", {}) or {}
|
|
||||||
snapshot = {
|
|
||||||
"status": "healthy",
|
|
||||||
"summary": "连接正常",
|
|
||||||
"db_index": _safe_int(redis_config.get("db", 0)),
|
|
||||||
"key_count": 0,
|
|
||||||
"connected_clients": 0,
|
|
||||||
"blocked_clients": 0,
|
|
||||||
"ops_per_sec": 0,
|
|
||||||
"used_memory_human": "",
|
|
||||||
"used_memory_peak_human": "",
|
|
||||||
"memory_usage_percent": 0.0,
|
|
||||||
"uptime_seconds": 0,
|
|
||||||
"hit_rate_percent": 0.0,
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
redis_conn = db_manager.get_redis_connection()
|
|
||||||
redis_conn.ping()
|
|
||||||
info = redis_conn.info() or {}
|
|
||||||
snapshot["key_count"] = _safe_int(redis_conn.dbsize())
|
|
||||||
snapshot["connected_clients"] = _safe_int(info.get("connected_clients"))
|
|
||||||
snapshot["blocked_clients"] = _safe_int(info.get("blocked_clients"))
|
|
||||||
snapshot["ops_per_sec"] = _safe_int(info.get("instantaneous_ops_per_sec"))
|
|
||||||
snapshot["used_memory_human"] = str(info.get("used_memory_human") or "").strip()
|
|
||||||
snapshot["used_memory_peak_human"] = str(info.get("used_memory_peak_human") or "").strip()
|
|
||||||
snapshot["uptime_seconds"] = _safe_int(info.get("uptime_in_seconds"))
|
|
||||||
|
|
||||||
maxmemory = _safe_int(info.get("maxmemory"))
|
|
||||||
used_memory = _safe_int(info.get("used_memory"))
|
|
||||||
if maxmemory > 0:
|
|
||||||
snapshot["memory_usage_percent"] = round((used_memory / maxmemory) * 100, 1)
|
|
||||||
|
|
||||||
keyspace_hits = _safe_int(info.get("keyspace_hits"))
|
|
||||||
keyspace_misses = _safe_int(info.get("keyspace_misses"))
|
|
||||||
if (keyspace_hits + keyspace_misses) > 0:
|
|
||||||
snapshot["hit_rate_percent"] = round(
|
|
||||||
(keyspace_hits / (keyspace_hits + keyspace_misses)) * 100,
|
|
||||||
1,
|
|
||||||
)
|
|
||||||
|
|
||||||
if snapshot["blocked_clients"] > 0 or snapshot["memory_usage_percent"] >= 80:
|
|
||||||
snapshot["status"] = "warning"
|
|
||||||
snapshot["summary"] = (
|
|
||||||
f"缓存压力需关注:keys {snapshot['key_count']},"
|
|
||||||
f"clients {snapshot['connected_clients']},ops/s {snapshot['ops_per_sec']}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
snapshot["summary"] = (
|
|
||||||
f"缓存正常:keys {snapshot['key_count']},"
|
|
||||||
f"clients {snapshot['connected_clients']},ops/s {snapshot['ops_per_sec']}"
|
|
||||||
)
|
|
||||||
return snapshot
|
|
||||||
except Exception as redis_error:
|
|
||||||
snapshot["status"] = "danger"
|
|
||||||
snapshot["summary"] = f"Redis 探测失败: {redis_error}"
|
|
||||||
return snapshot
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_snapshot_datetime(value: str | None) -> datetime | None:
|
|
||||||
"""把首页摘要里常用的时间字符串安全转换为 datetime。"""
|
|
||||||
text = str(value or "").strip()
|
|
||||||
if not text:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
return datetime.strptime(text, "%Y-%m-%d %H:%M:%S")
|
|
||||||
except ValueError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _count_enabled_runtime_items(items) -> int:
|
|
||||||
"""统计启用项数量。
|
|
||||||
|
|
||||||
兼容原因:
|
|
||||||
1. 新版目录模型里 providers/backends/scenes 可能是 dict;
|
|
||||||
2. 后台页面某些兜底逻辑里也可能给出 list;
|
|
||||||
3. 旧配置没有 enabled 字段时,直接按存在即计数。
|
|
||||||
"""
|
|
||||||
rows = []
|
|
||||||
if isinstance(items, dict):
|
|
||||||
rows = list(items.values())
|
|
||||||
elif isinstance(items, list):
|
|
||||||
rows = list(items)
|
|
||||||
count = 0
|
|
||||||
for row in rows:
|
|
||||||
if not isinstance(row, dict):
|
|
||||||
continue
|
|
||||||
if "enabled" not in row or bool(row.get("enabled", True)):
|
|
||||||
count += 1
|
|
||||||
return count
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_llm_catalog_summary() -> dict:
|
|
||||||
"""提取首页 LLM 路由配置摘要。
|
|
||||||
|
|
||||||
这里不做真实调用探测,只回答两个问题:
|
|
||||||
1. 运行时有没有可用的场景与目标;
|
|
||||||
2. 管理员当前看到的调用记录,大致落到了哪一套路由上。
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
catalog = LLMRegistry.get_catalog() or {}
|
|
||||||
if catalog:
|
|
||||||
providers = catalog.get("providers", {}) or {}
|
|
||||||
dify_apps = catalog.get("dify_apps", {}) or {}
|
|
||||||
backends = catalog.get("backends", {}) or {}
|
|
||||||
scenes = catalog.get("scenes", {}) or {}
|
|
||||||
default_scene = str(catalog.get("default_scene") or "").strip()
|
|
||||||
default_backend = str(LLMRegistry.get_scene_backend_name(default_scene) or "").strip() if default_scene else ""
|
|
||||||
return {
|
|
||||||
"provider_count": _count_enabled_runtime_items(providers),
|
|
||||||
"scene_count": _count_enabled_runtime_items(scenes),
|
|
||||||
"target_count": _count_enabled_runtime_items(backends) + _count_enabled_runtime_items(dify_apps),
|
|
||||||
"default_scene": default_scene,
|
|
||||||
"default_backend": default_backend,
|
|
||||||
"has_routing": _count_enabled_runtime_items(scenes) > 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
# 目录模型不存在时回退到 legacy 视图,至少让首页知道“有没有基础路由配置”。
|
|
||||||
legacy_llm = LLMRegistry.get_llm_config() or {}
|
|
||||||
scenes = legacy_llm.get("scenes", {}) or {}
|
|
||||||
backends = legacy_llm.get("backends", {}) or {}
|
|
||||||
default_backend = str(legacy_llm.get("default_backend") or "").strip()
|
|
||||||
return {
|
|
||||||
"provider_count": 0,
|
|
||||||
"scene_count": len(scenes) if isinstance(scenes, dict) else 0,
|
|
||||||
"target_count": len(backends) if isinstance(backends, dict) else 0,
|
|
||||||
"default_scene": "",
|
|
||||||
"default_backend": default_backend,
|
|
||||||
"has_routing": bool(scenes) or bool(default_backend),
|
|
||||||
}
|
|
||||||
except Exception as llm_catalog_error:
|
|
||||||
logger.warning(f"提取 LLM 路由摘要失败: {llm_catalog_error}")
|
|
||||||
return {
|
|
||||||
"provider_count": 0,
|
|
||||||
"scene_count": 0,
|
|
||||||
"target_count": 0,
|
|
||||||
"default_scene": "",
|
|
||||||
"default_backend": "",
|
|
||||||
"has_routing": False,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_ai_runtime_snapshot() -> dict:
|
|
||||||
"""构建首页 LLM 运行态摘要。
|
|
||||||
|
|
||||||
设计原则:
|
|
||||||
1. 首页只展示“最近调用窗口”的被动观测结果,不主动发请求探活;
|
|
||||||
2. 把最近调用和静态路由配置拼在一起,避免管理员只看到“成功/失败”却不知道走的是哪条链路;
|
|
||||||
3. 如果近期没有调用,也明确区分“未配置”和“已配置但当前空闲”。
|
|
||||||
"""
|
|
||||||
runtime_snapshot = UnifiedLLMClient.get_runtime_snapshot() or {}
|
|
||||||
last_call = dict(runtime_snapshot.get("last_call") or {})
|
|
||||||
catalog_summary = _extract_llm_catalog_summary()
|
|
||||||
|
|
||||||
total_calls = _safe_int(runtime_snapshot.get("total_calls"))
|
|
||||||
failed_calls = _safe_int(runtime_snapshot.get("failed_calls"))
|
|
||||||
success_rate = _safe_float(runtime_snapshot.get("success_rate"))
|
|
||||||
avg_latency_ms = _safe_float(runtime_snapshot.get("avg_latency_ms"))
|
|
||||||
last_error = str(runtime_snapshot.get("last_error") or "").strip()
|
|
||||||
|
|
||||||
snapshot = {
|
|
||||||
**runtime_snapshot,
|
|
||||||
"last_call": last_call,
|
|
||||||
"provider_count": catalog_summary.get("provider_count", 0),
|
|
||||||
"scene_count": catalog_summary.get("scene_count", 0),
|
|
||||||
"target_count": catalog_summary.get("target_count", 0),
|
|
||||||
"default_scene": catalog_summary.get("default_scene", ""),
|
|
||||||
"default_backend": catalog_summary.get("default_backend", ""),
|
|
||||||
"has_routing": bool(catalog_summary.get("has_routing")),
|
|
||||||
"last_provider": str(last_call.get("provider") or "").strip(),
|
|
||||||
"last_backend": str(last_call.get("backend") or "").strip(),
|
|
||||||
"last_scene": str(last_call.get("scene") or "").strip(),
|
|
||||||
"last_model": str(last_call.get("model") or "").strip(),
|
|
||||||
"last_timestamp": str(last_call.get("timestamp") or "").strip(),
|
|
||||||
"last_latency_ms": _safe_float(last_call.get("latency_ms")),
|
|
||||||
}
|
|
||||||
|
|
||||||
if not snapshot["has_routing"]:
|
|
||||||
snapshot["status"] = "warning"
|
|
||||||
snapshot["summary"] = "当前未发现完整的 LLM 路由配置,建议先检查默认场景与后端绑定"
|
|
||||||
return snapshot
|
|
||||||
|
|
||||||
if total_calls <= 0:
|
|
||||||
snapshot["status"] = "warning"
|
|
||||||
snapshot["summary"] = (
|
|
||||||
f"已配置 {snapshot['scene_count']} 个场景、{snapshot['target_count']} 个目标,"
|
|
||||||
"最近窗口内暂无统一 LLM 调用记录"
|
|
||||||
)
|
|
||||||
return snapshot
|
|
||||||
|
|
||||||
if failed_calls >= total_calls and total_calls > 0:
|
|
||||||
snapshot["status"] = "danger"
|
|
||||||
snapshot["summary"] = (
|
|
||||||
f"最近 {total_calls} 次调用全部失败,成功率 {success_rate:.2f}%,"
|
|
||||||
f"平均耗时 {avg_latency_ms:.2f}ms"
|
|
||||||
)
|
|
||||||
return snapshot
|
|
||||||
|
|
||||||
if failed_calls > 0 or last_error:
|
|
||||||
snapshot["status"] = "warning"
|
|
||||||
snapshot["summary"] = (
|
|
||||||
f"最近 {total_calls} 次调用中失败 {failed_calls} 次,成功率 {success_rate:.2f}%,"
|
|
||||||
f"平均耗时 {avg_latency_ms:.2f}ms"
|
|
||||||
)
|
|
||||||
return snapshot
|
|
||||||
|
|
||||||
snapshot["status"] = "healthy"
|
|
||||||
snapshot["summary"] = (
|
|
||||||
f"最近 {total_calls} 次调用全部成功,成功率 {success_rate:.2f}%,"
|
|
||||||
f"平均耗时 {avg_latency_ms:.2f}ms"
|
|
||||||
)
|
|
||||||
return snapshot
|
|
||||||
|
|
||||||
|
|
||||||
def _build_llm_runtime_analytics_payload() -> dict:
|
|
||||||
"""构建 LLM 最近窗口分析载荷。
|
|
||||||
|
|
||||||
为什么单独抽这个函数:
|
|
||||||
1. 首页 AI 卡片只看摘要,而 `system_llm` 页面需要更细粒度的分组表;
|
|
||||||
2. 两边都依赖同一套运行时快照,避免把 scene/backend/provider/model 聚合逻辑散在多个接口里;
|
|
||||||
3. 第一阶段先做“最近窗口分析”,让管理员快速识别慢场景、失败模型和异常后端。
|
|
||||||
"""
|
|
||||||
runtime_breakdown = UnifiedLLMClient.get_runtime_breakdown() or {}
|
|
||||||
overview_snapshot = _extract_ai_runtime_snapshot()
|
|
||||||
catalog_summary = _extract_llm_catalog_summary()
|
|
||||||
|
|
||||||
return {
|
|
||||||
"overview": {
|
|
||||||
"window_size": _safe_int(runtime_breakdown.get("window_size")),
|
|
||||||
"total_calls": _safe_int(runtime_breakdown.get("total_calls")),
|
|
||||||
"success_calls": _safe_int(runtime_breakdown.get("success_calls")),
|
|
||||||
"failed_calls": _safe_int(runtime_breakdown.get("failed_calls")),
|
|
||||||
"success_rate": _safe_float(runtime_breakdown.get("success_rate")),
|
|
||||||
"avg_latency_ms": _safe_float(runtime_breakdown.get("avg_latency_ms")),
|
|
||||||
"last_error": str(runtime_breakdown.get("last_error") or "").strip(),
|
|
||||||
"status": str(overview_snapshot.get("status") or "warning").strip(),
|
|
||||||
"summary": str(overview_snapshot.get("summary") or "").strip(),
|
|
||||||
"last_call": dict(runtime_breakdown.get("last_call") or {}),
|
|
||||||
"provider_count": _safe_int(catalog_summary.get("provider_count")),
|
|
||||||
"scene_count": _safe_int(catalog_summary.get("scene_count")),
|
|
||||||
"target_count": _safe_int(catalog_summary.get("target_count")),
|
|
||||||
"default_scene": str(catalog_summary.get("default_scene") or "").strip(),
|
|
||||||
"default_backend": str(catalog_summary.get("default_backend") or "").strip(),
|
|
||||||
"has_routing": bool(catalog_summary.get("has_routing")),
|
|
||||||
},
|
|
||||||
# 这里保留原始最近窗口明细,方便后续如果要做“最近 10 次调用”列表时直接复用。
|
|
||||||
"recent_rows": runtime_breakdown.get("rows", []) or [],
|
|
||||||
"by_scene": runtime_breakdown.get("by_scene", []) or [],
|
|
||||||
"by_backend": runtime_breakdown.get("by_backend", []) or [],
|
|
||||||
"by_provider": runtime_breakdown.get("by_provider", []) or [],
|
|
||||||
"by_model": runtime_breakdown.get("by_model", []) or [],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_scheduler_runtime_snapshot() -> dict:
|
|
||||||
"""聚合 async_job 运行态,生成首页任务调度摘要。
|
|
||||||
|
|
||||||
这里的目标不是替代完整任务页,而是回答管理员最常问的几件事:
|
|
||||||
1. 任务有没有正常装载;
|
|
||||||
2. 是否存在失败或非法调度;
|
|
||||||
3. 下一次任务大概何时执行;
|
|
||||||
4. 当前更多是系统任务,还是插件任务在跑。
|
|
||||||
"""
|
|
||||||
runtime_rows = async_job.get_jobs_snapshot()
|
|
||||||
next_run_candidates = []
|
|
||||||
failed_rows = []
|
|
||||||
system_job_count = 0
|
|
||||||
plugin_job_count = 0
|
|
||||||
|
|
||||||
for row in runtime_rows:
|
|
||||||
job_key = str(row.get("job_key") or "").strip()
|
|
||||||
owner_name = str(row.get("owner_name") or "system").strip().lower()
|
|
||||||
next_run_at = _parse_snapshot_datetime(row.get("next_run_at"))
|
|
||||||
last_status = str(row.get("last_status") or "").strip().lower()
|
|
||||||
|
|
||||||
if job_key.startswith("plugin_schedule:") or owner_name != "system":
|
|
||||||
plugin_job_count += 1
|
|
||||||
else:
|
|
||||||
system_job_count += 1
|
|
||||||
|
|
||||||
if bool(row.get("enabled")) and next_run_at:
|
|
||||||
next_run_candidates.append(next_run_at)
|
|
||||||
if last_status in {"failed", "invalid_schedule"}:
|
|
||||||
failed_rows.append(row)
|
|
||||||
|
|
||||||
latest_failed_row = {}
|
|
||||||
if failed_rows:
|
|
||||||
failed_rows.sort(
|
|
||||||
key=lambda row: (
|
|
||||||
_parse_snapshot_datetime(row.get("updated_at"))
|
|
||||||
or _parse_snapshot_datetime(row.get("last_run_at"))
|
|
||||||
or datetime.min
|
|
||||||
),
|
|
||||||
reverse=True,
|
|
||||||
)
|
|
||||||
latest_failed_row = failed_rows[0]
|
|
||||||
|
|
||||||
invalid_jobs = sum(
|
|
||||||
1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "invalid_schedule"
|
|
||||||
)
|
|
||||||
total_jobs = len(runtime_rows)
|
|
||||||
enabled_jobs = sum(1 for row in runtime_rows if bool(row.get("enabled")))
|
|
||||||
running_jobs = sum(1 for row in runtime_rows if bool(row.get("running")))
|
|
||||||
failed_jobs = len(failed_rows)
|
|
||||||
paused_jobs = total_jobs - enabled_jobs
|
|
||||||
never_run_jobs = sum(1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "never")
|
|
||||||
next_run_at_text = min(next_run_candidates).strftime("%Y-%m-%d %H:%M:%S") if next_run_candidates else ""
|
|
||||||
latest_failed_error = str(latest_failed_row.get("last_error") or "").strip()
|
|
||||||
if len(latest_failed_error) > 120:
|
|
||||||
latest_failed_error = f"{latest_failed_error[:117]}..."
|
|
||||||
|
|
||||||
snapshot = {
|
|
||||||
"status": "healthy",
|
|
||||||
"summary": "任务调度运行正常",
|
|
||||||
"total_jobs": total_jobs,
|
|
||||||
"enabled_jobs": enabled_jobs,
|
|
||||||
"running_jobs": running_jobs,
|
|
||||||
"failed_jobs": failed_jobs,
|
|
||||||
"invalid_jobs": invalid_jobs,
|
|
||||||
"paused_jobs": paused_jobs,
|
|
||||||
"never_run_jobs": never_run_jobs,
|
|
||||||
"system_job_count": system_job_count,
|
|
||||||
"plugin_job_count": plugin_job_count,
|
|
||||||
"next_run_at": next_run_at_text,
|
|
||||||
"latest_failed_job_name": str(latest_failed_row.get("name") or "").strip(),
|
|
||||||
"latest_failed_error": latest_failed_error,
|
|
||||||
}
|
|
||||||
|
|
||||||
if total_jobs <= 0:
|
|
||||||
snapshot["status"] = "warning"
|
|
||||||
snapshot["summary"] = "当前没有加载任何定时任务"
|
|
||||||
return snapshot
|
|
||||||
|
|
||||||
if invalid_jobs > 0:
|
|
||||||
snapshot["status"] = "danger"
|
|
||||||
snapshot["summary"] = f"发现 {invalid_jobs} 个任务调度配置非法,建议立即检查任务页"
|
|
||||||
return snapshot
|
|
||||||
|
|
||||||
if failed_jobs > 0:
|
|
||||||
snapshot["status"] = "warning"
|
|
||||||
snapshot["summary"] = (
|
|
||||||
f"最近有 {failed_jobs} 个任务执行失败,"
|
|
||||||
f"下一次执行 {next_run_at_text or '暂未计算'}"
|
|
||||||
)
|
|
||||||
return snapshot
|
|
||||||
|
|
||||||
if enabled_jobs <= 0:
|
|
||||||
snapshot["status"] = "warning"
|
|
||||||
snapshot["summary"] = "任务已加载,但当前没有启用中的调度任务"
|
|
||||||
return snapshot
|
|
||||||
|
|
||||||
if running_jobs > 0:
|
|
||||||
snapshot["summary"] = (
|
|
||||||
f"当前有 {running_jobs} 个任务执行中,"
|
|
||||||
f"下一次执行 {next_run_at_text or '暂未计算'}"
|
|
||||||
)
|
|
||||||
return snapshot
|
|
||||||
|
|
||||||
snapshot["summary"] = f"已启用 {enabled_jobs} 个任务,下一次执行 {next_run_at_text or '暂未计算'}"
|
|
||||||
return snapshot
|
|
||||||
|
|
||||||
|
|
||||||
def _legacy_llm_to_catalog(legacy_llm: dict) -> dict:
|
def _legacy_llm_to_catalog(legacy_llm: dict) -> dict:
|
||||||
"""把旧 llm(backends/scenes) 结构转换为新目录结构(仅用于兜底展示)。
|
"""把旧 llm(backends/scenes) 结构转换为新目录结构(仅用于兜底展示)。
|
||||||
|
|
||||||
@@ -906,11 +405,45 @@ def api_system_health_summary():
|
|||||||
_, recent_error_count = server.stats_db.get_error_logs(days=1, page=1, limit=1)
|
_, recent_error_count = server.stats_db.get_error_logs(days=1, page=1, limit=1)
|
||||||
|
|
||||||
# 基础设施健康:
|
# 基础设施健康:
|
||||||
# 1. MySQL / Redis 都在这里做“首页摘要级”探测,而不是完整深度巡检;
|
# 1. MySQL 用最轻量的 SELECT 1 做可用性探测;
|
||||||
# 2. 除了连通性,还补充少量负载指标,方便管理员快速判断是否需要继续下钻;
|
# 2. Redis 用 PING 验证连接池当前是否可拿到可用连接;
|
||||||
# 3. 即使探测失败也只反馈到看板,不影响主接口整体返回。
|
# 3. 即使探测失败也只反馈到看板,不影响主接口整体返回。
|
||||||
mysql_snapshot = _extract_mysql_runtime_snapshot(server.db_manager)
|
mysql_status = "healthy"
|
||||||
redis_snapshot = _extract_redis_runtime_snapshot(server.db_manager)
|
mysql_summary = "连接正常"
|
||||||
|
try:
|
||||||
|
mysql_conn = server.db_manager.get_mysql_connection()
|
||||||
|
try:
|
||||||
|
with mysql_conn.cursor() as cursor:
|
||||||
|
cursor.execute("SELECT 1")
|
||||||
|
cursor.fetchone()
|
||||||
|
finally:
|
||||||
|
mysql_conn.close()
|
||||||
|
except Exception as mysql_error:
|
||||||
|
mysql_status = "danger"
|
||||||
|
mysql_summary = f"MySQL 探测失败: {mysql_error}"
|
||||||
|
|
||||||
|
redis_status = "healthy"
|
||||||
|
redis_summary = "连接正常"
|
||||||
|
try:
|
||||||
|
redis_conn = server.db_manager.get_redis_connection()
|
||||||
|
redis_conn.ping()
|
||||||
|
except Exception as redis_error:
|
||||||
|
redis_status = "danger"
|
||||||
|
redis_summary = f"Redis 探测失败: {redis_error}"
|
||||||
|
|
||||||
|
# md2img 健康快照已经有现成实现,这里只做聚合,不主动预热运行时。
|
||||||
|
md2img_snapshot = get_md2img_health_snapshot(ensure_runtime=False) or {}
|
||||||
|
browser_ready = bool(
|
||||||
|
md2img_snapshot.get("browser_ready")
|
||||||
|
or md2img_snapshot.get("playwright_ready")
|
||||||
|
or md2img_snapshot.get("ready")
|
||||||
|
)
|
||||||
|
runtime_ready = bool(
|
||||||
|
md2img_snapshot.get("runtime_ready")
|
||||||
|
or md2img_snapshot.get("runtime_initialized")
|
||||||
|
or md2img_snapshot.get("initialized")
|
||||||
|
)
|
||||||
|
md2img_healthy = runtime_ready and browser_ready
|
||||||
|
|
||||||
# 首页只需要“够判断”的轻量结论,因此统一产出 status + summary 文本,前端无需重复拼装业务规则。
|
# 首页只需要“够判断”的轻量结论,因此统一产出 status + summary 文本,前端无需重复拼装业务规则。
|
||||||
robot_running = bool(getattr(robot, "ipad_running", False))
|
robot_running = bool(getattr(robot, "ipad_running", False))
|
||||||
@@ -937,11 +470,37 @@ def api_system_health_summary():
|
|||||||
error_status = "healthy"
|
error_status = "healthy"
|
||||||
error_summary = "近 24 小时未记录到异常"
|
error_summary = "近 24 小时未记录到异常"
|
||||||
|
|
||||||
# 首页 AI 卡片升级为“运行态 + 路由摘要”,仍然保持被动观测,不主动探活。
|
if md2img_healthy:
|
||||||
ai_runtime = _extract_ai_runtime_snapshot()
|
md2img_status = "healthy"
|
||||||
|
md2img_summary = "运行时与浏览器均已就绪"
|
||||||
|
elif runtime_ready or browser_ready:
|
||||||
|
md2img_status = "warning"
|
||||||
|
md2img_summary = "运行时部分可用,建议检查预热状态"
|
||||||
|
else:
|
||||||
|
md2img_status = "danger"
|
||||||
|
md2img_summary = "运行时未就绪,相关转图能力可能不可用"
|
||||||
|
|
||||||
# Markdown 转图更适合保留在专门页面里排障,首页右侧改成更通用的任务调度摘要。
|
# AI 运行态:
|
||||||
scheduler_runtime = _extract_scheduler_runtime_snapshot()
|
# 1. 统一从 UnifiedLLMClient 最近调用窗口读取,避免各插件单独维护监控数据;
|
||||||
|
# 2. 若当前窗口还没有调用记录,就明确返回“暂无调用”,避免误判成异常。
|
||||||
|
ai_runtime = UnifiedLLMClient.get_runtime_snapshot()
|
||||||
|
ai_total_calls = int(ai_runtime.get("total_calls") or 0)
|
||||||
|
ai_failed_calls = int(ai_runtime.get("failed_calls") or 0)
|
||||||
|
if ai_total_calls <= 0:
|
||||||
|
ai_status = "warning"
|
||||||
|
ai_summary = "最近窗口内暂无统一 LLM 调用记录"
|
||||||
|
elif ai_failed_calls > 0:
|
||||||
|
ai_status = "warning"
|
||||||
|
ai_summary = (
|
||||||
|
f"最近 {ai_total_calls} 次调用中失败 {ai_failed_calls} 次,"
|
||||||
|
f"平均耗时 {ai_runtime.get('avg_latency_ms', 0)}ms"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
ai_status = "healthy"
|
||||||
|
ai_summary = (
|
||||||
|
f"最近 {ai_total_calls} 次调用全部成功,"
|
||||||
|
f"平均耗时 {ai_runtime.get('avg_latency_ms', 0)}ms"
|
||||||
|
)
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
"success": True,
|
"success": True,
|
||||||
@@ -965,28 +524,33 @@ def api_system_health_summary():
|
|||||||
"summary": error_summary,
|
"summary": error_summary,
|
||||||
},
|
},
|
||||||
"infrastructure": {
|
"infrastructure": {
|
||||||
"status": (
|
"status": "healthy" if mysql_status == "healthy" and redis_status == "healthy" else "danger",
|
||||||
"danger"
|
|
||||||
if "danger" in {mysql_snapshot.get("status"), redis_snapshot.get("status")}
|
|
||||||
else ("warning" if "warning" in {mysql_snapshot.get("status"), redis_snapshot.get("status")} else "healthy")
|
|
||||||
),
|
|
||||||
"summary": (
|
"summary": (
|
||||||
"MySQL / Redis 均正常"
|
"MySQL / Redis 均正常"
|
||||||
if mysql_snapshot.get("status") == "healthy" and redis_snapshot.get("status") == "healthy"
|
if mysql_status == "healthy" and redis_status == "healthy"
|
||||||
else (
|
else "存在基础设施连接异常"
|
||||||
"基础设施连接正常,但部分负载指标需要关注"
|
|
||||||
if mysql_snapshot.get("status") != "danger" and redis_snapshot.get("status") != "danger"
|
|
||||||
else "存在基础设施连接异常"
|
|
||||||
)
|
|
||||||
),
|
),
|
||||||
"mysql": mysql_snapshot,
|
"mysql": {
|
||||||
"redis": redis_snapshot,
|
"status": mysql_status,
|
||||||
|
"summary": mysql_summary,
|
||||||
|
},
|
||||||
|
"redis": {
|
||||||
|
"status": redis_status,
|
||||||
|
"summary": redis_summary,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"ai_runtime": {
|
"ai_runtime": {
|
||||||
|
"status": ai_status,
|
||||||
|
"summary": ai_summary,
|
||||||
**ai_runtime,
|
**ai_runtime,
|
||||||
},
|
},
|
||||||
"scheduler": {
|
"md2img": {
|
||||||
**scheduler_runtime,
|
"status": md2img_status,
|
||||||
|
"healthy": md2img_healthy,
|
||||||
|
"runtime_ready": runtime_ready,
|
||||||
|
"browser_ready": browser_ready,
|
||||||
|
"summary": md2img_summary,
|
||||||
|
"detail": md2img_snapshot,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -1155,26 +719,6 @@ def get_system_llm_config():
|
|||||||
return jsonify({"success": False, "message": str(e)}), 500
|
return jsonify({"success": False, "message": str(e)}), 500
|
||||||
|
|
||||||
|
|
||||||
@system_bp.route('/api/system/llm_runtime_analytics', methods=['GET'])
|
|
||||||
@login_required
|
|
||||||
def get_system_llm_runtime_analytics():
|
|
||||||
"""返回 LLM 最近窗口分析结果。
|
|
||||||
|
|
||||||
这里不主动发起探活请求,也不做持久化成本结算,只消费统一客户端已经记录的最近窗口埋点:
|
|
||||||
1. 避免后台刷新页面反过来给 AI 服务制造额外压力;
|
|
||||||
2. 先把“按场景/后端/模型看成功率与耗时”做扎实;
|
|
||||||
3. 为后续真正的 token 成本中心预留接口形态。
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
return jsonify({
|
|
||||||
"success": True,
|
|
||||||
"data": _build_llm_runtime_analytics_payload(),
|
|
||||||
})
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"读取 LLM 运行分析失败: {e}")
|
|
||||||
return jsonify({"success": False, "message": str(e)}), 500
|
|
||||||
|
|
||||||
|
|
||||||
@system_bp.route('/api/system/llm_config', methods=['POST'])
|
@system_bp.route('/api/system/llm_config', methods=['POST'])
|
||||||
@login_required
|
@login_required
|
||||||
def update_system_llm_config():
|
def update_system_llm_config():
|
||||||
|
|||||||
@@ -8,181 +8,14 @@
|
|||||||
<div class="page-hero-copy">
|
<div class="page-hero-copy">
|
||||||
<div class="page-eyebrow">LLM Catalog</div>
|
<div class="page-eyebrow">LLM Catalog</div>
|
||||||
<h1>LLM目录配置</h1>
|
<h1>LLM目录配置</h1>
|
||||||
<p>按 Provider 模板、Dify 应用、Scene 绑定三层维护,并结合最近窗口运行分析判断哪条 AI 路由更慢、更容易失败。</p>
|
<p>按 Provider 模板、Dify 应用、Scene 绑定三层维护,减少重复配置和切换成本。</p>
|
||||||
</div>
|
</div>
|
||||||
<div class="page-hero-actions">
|
<div class="page-hero-actions">
|
||||||
<el-button size="mini" plain :loading="runtimeAnalyticsLoading" @click="reloadPageData">刷新</el-button>
|
<el-button size="mini" plain @click="loadLlmConfig">刷新</el-button>
|
||||||
<el-button size="mini" type="success" @click="saveLlmConfig">保存配置</el-button>
|
<el-button size="mini" type="success" @click="saveLlmConfig">保存配置</el-button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<el-card class="workspace-card" shadow="hover">
|
|
||||||
<div slot="header" class="workspace-header">
|
|
||||||
<div>
|
|
||||||
<h3>AI运行分析</h3>
|
|
||||||
<p>基于统一 LLM 客户端最近窗口埋点做被动观测,不额外发起探活请求。</p>
|
|
||||||
</div>
|
|
||||||
<div class="config-meta">
|
|
||||||
<span>窗口容量:{% raw %}{{ runtimeAnalytics.overview.window_size || 0 }}{% endraw %}</span>
|
|
||||||
<span>默认场景:{% raw %}{{ runtimeAnalytics.overview.default_scene || '-' }}{% endraw %}</span>
|
|
||||||
<span>默认目标:{% raw %}{{ runtimeAnalytics.overview.default_backend || '-' }}{% endraw %}</span>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="runtime-summary-grid">
|
|
||||||
<div class="runtime-summary-card">
|
|
||||||
<div class="summary-label">最近调用</div>
|
|
||||||
<div class="summary-value">{% raw %}{{ runtimeAnalytics.overview.total_calls || 0 }}{% endraw %}</div>
|
|
||||||
<div class="summary-hint">成功 {% raw %}{{ runtimeAnalytics.overview.success_calls || 0 }}{% endraw %} / 失败 {% raw %}{{ runtimeAnalytics.overview.failed_calls || 0 }}{% endraw %}</div>
|
|
||||||
</div>
|
|
||||||
<div class="runtime-summary-card">
|
|
||||||
<div class="summary-label">成功率</div>
|
|
||||||
<div class="summary-value">{% raw %}{{ formatPercent(runtimeAnalytics.overview.success_rate) }}{% endraw %}</div>
|
|
||||||
<div class="summary-hint">按最近窗口实时汇总</div>
|
|
||||||
</div>
|
|
||||||
<div class="runtime-summary-card">
|
|
||||||
<div class="summary-label">平均耗时</div>
|
|
||||||
<div class="summary-value">{% raw %}{{ formatLatency(runtimeAnalytics.overview.avg_latency_ms) }}{% endraw %}</div>
|
|
||||||
<div class="summary-hint">用于快速识别慢场景</div>
|
|
||||||
</div>
|
|
||||||
<div class="runtime-summary-card">
|
|
||||||
<div class="summary-label">路由规模</div>
|
|
||||||
<div class="summary-value">{% raw %}{{ runtimeAnalytics.overview.scene_count || 0 }}{% endraw %}</div>
|
|
||||||
<div class="summary-hint">场景数 / 目标数 {% raw %}{{ runtimeAnalytics.overview.target_count || 0 }}{% endraw %}</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="runtime-overview-panel">
|
|
||||||
<div class="runtime-status-row">
|
|
||||||
<el-tag size="mini" :type="statusTagType(runtimeAnalytics.overview.status)">
|
|
||||||
{% raw %}{{ statusText(runtimeAnalytics.overview.status) }}{% endraw %}
|
|
||||||
</el-tag>
|
|
||||||
<span class="runtime-overview-text">{% raw %}{{ runtimeAnalytics.overview.summary || '最近窗口内暂无统一 LLM 调用记录' }}{% endraw %}</span>
|
|
||||||
</div>
|
|
||||||
<div class="runtime-overview-meta">
|
|
||||||
<span>Provider 模板:{% raw %}{{ runtimeAnalytics.overview.provider_count || 0 }}{% endraw %}</span>
|
|
||||||
<span>最近场景:{% raw %}{{ runtimeAnalytics.overview.last_call.scene || '-' }}{% endraw %}</span>
|
|
||||||
<span>最近后端:{% raw %}{{ runtimeAnalytics.overview.last_call.backend || '-' }}{% endraw %}</span>
|
|
||||||
<span>最近模型:{% raw %}{{ runtimeAnalytics.overview.last_call.model || '-' }}{% endraw %}</span>
|
|
||||||
<span>最近时间:{% raw %}{{ runtimeAnalytics.overview.last_call.timestamp || '-' }}{% endraw %}</span>
|
|
||||||
</div>
|
|
||||||
<div class="runtime-error-box" v-if="runtimeAnalytics.overview.last_error">
|
|
||||||
<strong>最近错误:</strong>
|
|
||||||
<span>{% raw %}{{ runtimeAnalytics.overview.last_error }}{% endraw %}</span>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="runtime-table-grid">
|
|
||||||
<el-card class="analytics-card" shadow="never">
|
|
||||||
<div slot="header" class="runtime-table-header">
|
|
||||||
<div>
|
|
||||||
<h4>按场景统计</h4>
|
|
||||||
<p>定位哪个业务场景最常调用、最容易失败。</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<el-table v-if="runtimeAnalytics.by_scene.length" :data="runtimeAnalytics.by_scene" size="mini" style="width: 100%">
|
|
||||||
<el-table-column prop="key" label="Scene" min-width="150" show-overflow-tooltip></el-table-column>
|
|
||||||
<el-table-column prop="total_calls" label="调用数" width="80"></el-table-column>
|
|
||||||
<el-table-column label="成功率" width="100">
|
|
||||||
<template slot-scope="scope">
|
|
||||||
{% raw %}{{ formatPercent(scope.row.success_rate) }}{% endraw %}
|
|
||||||
</template>
|
|
||||||
</el-table-column>
|
|
||||||
<el-table-column label="平均耗时" width="110">
|
|
||||||
<template slot-scope="scope">
|
|
||||||
{% raw %}{{ formatLatency(scope.row.avg_latency_ms) }}{% endraw %}
|
|
||||||
</template>
|
|
||||||
</el-table-column>
|
|
||||||
<el-table-column prop="failed_calls" label="失败数" width="80"></el-table-column>
|
|
||||||
<el-table-column prop="last_call_at" label="最近调用" min-width="150"></el-table-column>
|
|
||||||
<el-table-column prop="last_error" label="最近错误" min-width="220" show-overflow-tooltip></el-table-column>
|
|
||||||
</el-table>
|
|
||||||
<el-empty v-else description="最近窗口内暂无场景调用数据"></el-empty>
|
|
||||||
</el-card>
|
|
||||||
|
|
||||||
<el-card class="analytics-card" shadow="never">
|
|
||||||
<div slot="header" class="runtime-table-header">
|
|
||||||
<div>
|
|
||||||
<h4>按后端统计</h4>
|
|
||||||
<p>观察 backend 层是否存在集中失败或慢请求。</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<el-table v-if="runtimeAnalytics.by_backend.length" :data="runtimeAnalytics.by_backend" size="mini" style="width: 100%">
|
|
||||||
<el-table-column prop="key" label="Backend" min-width="150" show-overflow-tooltip></el-table-column>
|
|
||||||
<el-table-column prop="total_calls" label="调用数" width="80"></el-table-column>
|
|
||||||
<el-table-column label="成功率" width="100">
|
|
||||||
<template slot-scope="scope">
|
|
||||||
{% raw %}{{ formatPercent(scope.row.success_rate) }}{% endraw %}
|
|
||||||
</template>
|
|
||||||
</el-table-column>
|
|
||||||
<el-table-column label="平均耗时" width="110">
|
|
||||||
<template slot-scope="scope">
|
|
||||||
{% raw %}{{ formatLatency(scope.row.avg_latency_ms) }}{% endraw %}
|
|
||||||
</template>
|
|
||||||
</el-table-column>
|
|
||||||
<el-table-column prop="failed_calls" label="失败数" width="80"></el-table-column>
|
|
||||||
<el-table-column prop="last_call_at" label="最近调用" min-width="150"></el-table-column>
|
|
||||||
<el-table-column prop="last_error" label="最近错误" min-width="220" show-overflow-tooltip></el-table-column>
|
|
||||||
</el-table>
|
|
||||||
<el-empty v-else description="最近窗口内暂无后端调用数据"></el-empty>
|
|
||||||
</el-card>
|
|
||||||
|
|
||||||
<el-card class="analytics-card" shadow="never">
|
|
||||||
<div slot="header" class="runtime-table-header">
|
|
||||||
<div>
|
|
||||||
<h4>按 Provider 统计</h4>
|
|
||||||
<p>区分 Dify 与 OpenAI Compatible 等不同接入形态的表现。</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<el-table v-if="runtimeAnalytics.by_provider.length" :data="runtimeAnalytics.by_provider" size="mini" style="width: 100%">
|
|
||||||
<el-table-column prop="key" label="Provider" min-width="150" show-overflow-tooltip></el-table-column>
|
|
||||||
<el-table-column prop="total_calls" label="调用数" width="80"></el-table-column>
|
|
||||||
<el-table-column label="成功率" width="100">
|
|
||||||
<template slot-scope="scope">
|
|
||||||
{% raw %}{{ formatPercent(scope.row.success_rate) }}{% endraw %}
|
|
||||||
</template>
|
|
||||||
</el-table-column>
|
|
||||||
<el-table-column label="平均耗时" width="110">
|
|
||||||
<template slot-scope="scope">
|
|
||||||
{% raw %}{{ formatLatency(scope.row.avg_latency_ms) }}{% endraw %}
|
|
||||||
</template>
|
|
||||||
</el-table-column>
|
|
||||||
<el-table-column prop="failed_calls" label="失败数" width="80"></el-table-column>
|
|
||||||
<el-table-column prop="last_call_at" label="最近调用" min-width="150"></el-table-column>
|
|
||||||
<el-table-column prop="last_error" label="最近错误" min-width="220" show-overflow-tooltip></el-table-column>
|
|
||||||
</el-table>
|
|
||||||
<el-empty v-else description="最近窗口内暂无 Provider 调用数据"></el-empty>
|
|
||||||
</el-card>
|
|
||||||
|
|
||||||
<el-card class="analytics-card" shadow="never">
|
|
||||||
<div slot="header" class="runtime-table-header">
|
|
||||||
<div>
|
|
||||||
<h4>按模型统计</h4>
|
|
||||||
<p>帮助判断是否需要按场景切换模型或做降级策略。</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<el-table v-if="runtimeAnalytics.by_model.length" :data="runtimeAnalytics.by_model" size="mini" style="width: 100%">
|
|
||||||
<el-table-column prop="key" label="Model" min-width="150" show-overflow-tooltip></el-table-column>
|
|
||||||
<el-table-column prop="total_calls" label="调用数" width="80"></el-table-column>
|
|
||||||
<el-table-column label="成功率" width="100">
|
|
||||||
<template slot-scope="scope">
|
|
||||||
{% raw %}{{ formatPercent(scope.row.success_rate) }}{% endraw %}
|
|
||||||
</template>
|
|
||||||
</el-table-column>
|
|
||||||
<el-table-column label="平均耗时" width="110">
|
|
||||||
<template slot-scope="scope">
|
|
||||||
{% raw %}{{ formatLatency(scope.row.avg_latency_ms) }}{% endraw %}
|
|
||||||
</template>
|
|
||||||
</el-table-column>
|
|
||||||
<el-table-column prop="failed_calls" label="失败数" width="80"></el-table-column>
|
|
||||||
<el-table-column prop="last_call_at" label="最近调用" min-width="150"></el-table-column>
|
|
||||||
<el-table-column prop="last_error" label="最近错误" min-width="220" show-overflow-tooltip></el-table-column>
|
|
||||||
</el-table>
|
|
||||||
<el-empty v-else description="最近窗口内暂无模型调用数据"></el-empty>
|
|
||||||
</el-card>
|
|
||||||
</div>
|
|
||||||
</el-card>
|
|
||||||
|
|
||||||
<el-card class="workspace-card" shadow="hover">
|
<el-card class="workspace-card" shadow="hover">
|
||||||
<div slot="header" class="workspace-header">
|
<div slot="header" class="workspace-header">
|
||||||
<div>
|
<div>
|
||||||
@@ -382,30 +215,6 @@
|
|||||||
currentView: '17',
|
currentView: '17',
|
||||||
configPath: '',
|
configPath: '',
|
||||||
topologyRows: [],
|
topologyRows: [],
|
||||||
runtimeAnalyticsLoading: false,
|
|
||||||
runtimeAnalytics: {
|
|
||||||
overview: {
|
|
||||||
window_size: 0,
|
|
||||||
total_calls: 0,
|
|
||||||
success_calls: 0,
|
|
||||||
failed_calls: 0,
|
|
||||||
success_rate: 0,
|
|
||||||
avg_latency_ms: 0,
|
|
||||||
last_error: '',
|
|
||||||
status: 'warning',
|
|
||||||
summary: '',
|
|
||||||
last_call: {},
|
|
||||||
provider_count: 0,
|
|
||||||
scene_count: 0,
|
|
||||||
target_count: 0,
|
|
||||||
default_scene: '',
|
|
||||||
default_backend: ''
|
|
||||||
},
|
|
||||||
by_scene: [],
|
|
||||||
by_backend: [],
|
|
||||||
by_provider: [],
|
|
||||||
by_model: []
|
|
||||||
},
|
|
||||||
catalog: {
|
catalog: {
|
||||||
default_scene: '',
|
default_scene: '',
|
||||||
providers: [],
|
providers: [],
|
||||||
@@ -431,45 +240,12 @@
|
|||||||
},
|
},
|
||||||
mounted() {
|
mounted() {
|
||||||
this.currentView = '17';
|
this.currentView = '17';
|
||||||
this.reloadPageData();
|
this.loadLlmConfig();
|
||||||
},
|
},
|
||||||
methods: {
|
methods: {
|
||||||
newUid() {
|
newUid() {
|
||||||
return `${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
return `${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
||||||
},
|
},
|
||||||
// 统一刷新配置与运行分析,避免管理员点一次“刷新”只能看到半套信息。
|
|
||||||
async reloadPageData() {
|
|
||||||
await Promise.all([
|
|
||||||
this.loadLlmConfig(),
|
|
||||||
this.loadRuntimeAnalytics()
|
|
||||||
]);
|
|
||||||
},
|
|
||||||
statusTagType(status) {
|
|
||||||
if (status === 'healthy') {
|
|
||||||
return 'success';
|
|
||||||
}
|
|
||||||
if (status === 'danger') {
|
|
||||||
return 'danger';
|
|
||||||
}
|
|
||||||
return 'warning';
|
|
||||||
},
|
|
||||||
statusText(status) {
|
|
||||||
if (status === 'healthy') {
|
|
||||||
return '运行正常';
|
|
||||||
}
|
|
||||||
if (status === 'danger') {
|
|
||||||
return '需要立即处理';
|
|
||||||
}
|
|
||||||
return '需要关注';
|
|
||||||
},
|
|
||||||
formatPercent(value) {
|
|
||||||
const numeric = Number(value || 0);
|
|
||||||
return `${numeric.toFixed(2)}%`;
|
|
||||||
},
|
|
||||||
formatLatency(value) {
|
|
||||||
const numeric = Number(value || 0);
|
|
||||||
return `${numeric.toFixed(2)} ms`;
|
|
||||||
},
|
|
||||||
// Provider 模板:只放公共字段,避免 Dify 每个应用重复填写。
|
// Provider 模板:只放公共字段,避免 Dify 每个应用重复填写。
|
||||||
newProvider() {
|
newProvider() {
|
||||||
return {
|
return {
|
||||||
@@ -620,46 +396,6 @@
|
|||||||
}
|
}
|
||||||
return this.difyAppNameOptions;
|
return this.difyAppNameOptions;
|
||||||
},
|
},
|
||||||
async loadRuntimeAnalytics() {
|
|
||||||
this.runtimeAnalyticsLoading = true;
|
|
||||||
try {
|
|
||||||
const response = await axios.get('/api/system/llm_runtime_analytics');
|
|
||||||
if (!response.data.success) {
|
|
||||||
this.$message.error(response.data.message || '读取 AI 运行分析失败');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const data = response.data.data || {};
|
|
||||||
const overview = data.overview || {};
|
|
||||||
// 这里做前端兜底结构归一化,避免后端未来新增字段时影响当前页面渲染。
|
|
||||||
this.runtimeAnalytics = {
|
|
||||||
overview: {
|
|
||||||
window_size: overview.window_size || 0,
|
|
||||||
total_calls: overview.total_calls || 0,
|
|
||||||
success_calls: overview.success_calls || 0,
|
|
||||||
failed_calls: overview.failed_calls || 0,
|
|
||||||
success_rate: overview.success_rate || 0,
|
|
||||||
avg_latency_ms: overview.avg_latency_ms || 0,
|
|
||||||
last_error: overview.last_error || '',
|
|
||||||
status: overview.status || 'warning',
|
|
||||||
summary: overview.summary || '',
|
|
||||||
last_call: overview.last_call || {},
|
|
||||||
provider_count: overview.provider_count || 0,
|
|
||||||
scene_count: overview.scene_count || 0,
|
|
||||||
target_count: overview.target_count || 0,
|
|
||||||
default_scene: overview.default_scene || '',
|
|
||||||
default_backend: overview.default_backend || ''
|
|
||||||
},
|
|
||||||
by_scene: data.by_scene || [],
|
|
||||||
by_backend: data.by_backend || [],
|
|
||||||
by_provider: data.by_provider || [],
|
|
||||||
by_model: data.by_model || []
|
|
||||||
};
|
|
||||||
} catch (error) {
|
|
||||||
this.$message.error(error.response?.data?.message || '读取 AI 运行分析失败');
|
|
||||||
} finally {
|
|
||||||
this.runtimeAnalyticsLoading = false;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
async loadLlmConfig() {
|
async loadLlmConfig() {
|
||||||
try {
|
try {
|
||||||
const response = await axios.get('/api/system/llm_config');
|
const response = await axios.get('/api/system/llm_config');
|
||||||
@@ -738,7 +474,7 @@
|
|||||||
const response = await axios.post('/api/system/llm_config', payload);
|
const response = await axios.post('/api/system/llm_config', payload);
|
||||||
if (response.data.success) {
|
if (response.data.success) {
|
||||||
this.$message.success(response.data.message || '保存成功');
|
this.$message.success(response.data.message || '保存成功');
|
||||||
this.reloadPageData();
|
this.loadLlmConfig();
|
||||||
} else {
|
} else {
|
||||||
this.$message.error(response.data.message || '保存失败');
|
this.$message.error(response.data.message || '保存失败');
|
||||||
}
|
}
|
||||||
@@ -777,86 +513,6 @@
|
|||||||
gap: 8px;
|
gap: 8px;
|
||||||
flex-wrap: wrap;
|
flex-wrap: wrap;
|
||||||
}
|
}
|
||||||
.runtime-summary-grid {
|
|
||||||
display: grid;
|
|
||||||
grid-template-columns: repeat(4, minmax(180px, 1fr));
|
|
||||||
gap: 14px;
|
|
||||||
margin-bottom: 16px;
|
|
||||||
}
|
|
||||||
.runtime-summary-card {
|
|
||||||
padding: 16px 18px;
|
|
||||||
border-radius: 16px;
|
|
||||||
border: 1px solid rgba(148,163,184,0.18);
|
|
||||||
background: linear-gradient(180deg, rgba(255,255,255,0.96), rgba(241,245,249,0.88));
|
|
||||||
}
|
|
||||||
.summary-label {
|
|
||||||
font-size: 12px;
|
|
||||||
color: #64748b;
|
|
||||||
margin-bottom: 8px;
|
|
||||||
}
|
|
||||||
.summary-value {
|
|
||||||
font-size: 28px;
|
|
||||||
line-height: 1;
|
|
||||||
font-weight: 700;
|
|
||||||
color: #0f172a;
|
|
||||||
margin-bottom: 8px;
|
|
||||||
}
|
|
||||||
.summary-hint {
|
|
||||||
font-size: 12px;
|
|
||||||
color: #475569;
|
|
||||||
}
|
|
||||||
.runtime-overview-panel {
|
|
||||||
padding: 16px 18px;
|
|
||||||
border-radius: 16px;
|
|
||||||
background: rgba(15, 23, 42, 0.03);
|
|
||||||
border: 1px solid rgba(148,163,184,0.14);
|
|
||||||
margin-bottom: 18px;
|
|
||||||
}
|
|
||||||
.runtime-status-row {
|
|
||||||
display: flex;
|
|
||||||
align-items: center;
|
|
||||||
gap: 10px;
|
|
||||||
flex-wrap: wrap;
|
|
||||||
margin-bottom: 10px;
|
|
||||||
}
|
|
||||||
.runtime-overview-text {
|
|
||||||
color: #0f172a;
|
|
||||||
font-size: 14px;
|
|
||||||
}
|
|
||||||
.runtime-overview-meta {
|
|
||||||
display: flex;
|
|
||||||
gap: 12px 18px;
|
|
||||||
flex-wrap: wrap;
|
|
||||||
color: #64748b;
|
|
||||||
font-size: 12px;
|
|
||||||
}
|
|
||||||
.runtime-error-box {
|
|
||||||
margin-top: 12px;
|
|
||||||
padding: 10px 12px;
|
|
||||||
border-radius: 10px;
|
|
||||||
background: rgba(239, 68, 68, 0.08);
|
|
||||||
color: #991b1b;
|
|
||||||
font-size: 12px;
|
|
||||||
line-height: 1.6;
|
|
||||||
}
|
|
||||||
.runtime-table-grid {
|
|
||||||
display: grid;
|
|
||||||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
|
||||||
gap: 16px;
|
|
||||||
}
|
|
||||||
.analytics-card {
|
|
||||||
border: 1px solid rgba(148,163,184,0.16);
|
|
||||||
border-radius: 16px;
|
|
||||||
}
|
|
||||||
.runtime-table-header h4 {
|
|
||||||
font-size: 16px;
|
|
||||||
margin-bottom: 4px;
|
|
||||||
color: #0f172a;
|
|
||||||
}
|
|
||||||
.runtime-table-header p {
|
|
||||||
color: #64748b;
|
|
||||||
font-size: 12px;
|
|
||||||
}
|
|
||||||
.section-list { display: flex; flex-direction: column; gap: 12px; }
|
.section-list { display: flex; flex-direction: column; gap: 12px; }
|
||||||
.entry-card { border: 1px solid rgba(148,163,184,0.16); border-radius: 14px; }
|
.entry-card { border: 1px solid rgba(148,163,184,0.16); border-radius: 14px; }
|
||||||
.entry-header { display: flex; align-items: center; justify-content: space-between; gap: 12px; }
|
.entry-header { display: flex; align-items: center; justify-content: space-between; gap: 12px; }
|
||||||
@@ -885,8 +541,6 @@
|
|||||||
@media (max-width: 960px) {
|
@media (max-width: 960px) {
|
||||||
.page-hero { flex-direction: column; align-items: flex-start; }
|
.page-hero { flex-direction: column; align-items: flex-start; }
|
||||||
.workspace-header { flex-direction: column; align-items: flex-start; }
|
.workspace-header { flex-direction: column; align-items: flex-start; }
|
||||||
.runtime-summary-grid { grid-template-columns: 1fr; }
|
|
||||||
.runtime-table-grid { grid-template-columns: 1fr; }
|
|
||||||
.entry-grid { grid-template-columns: 1fr; }
|
.entry-grid { grid-template-columns: 1fr; }
|
||||||
.scene-row { grid-template-columns: 1fr; }
|
.scene-row { grid-template-columns: 1fr; }
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -524,12 +524,6 @@
|
|||||||
|
|
||||||
- 让 AI 能力更可控、更可衡量
|
- 让 AI 能力更可控、更可衡量
|
||||||
|
|
||||||
当前进展:
|
|
||||||
|
|
||||||
- 第一阶段已完成:后台 `LLM目录配置` 页面已补充“AI运行分析”区块,可查看最近窗口内统一 LLM 调用的成功率、平均耗时、失败次数与最近错误
|
|
||||||
- 第一阶段已完成:已支持按 `scene / backend / provider / model` 四个维度聚合最近窗口调用数据,便于快速识别慢场景、异常后端与高失败模型
|
|
||||||
- 当前仍以“最近窗口运行分析”为主,暂未引入持久化 token 成本结算;后续可在确认治理需求后继续扩展预算、告警与降级策略
|
|
||||||
|
|
||||||
建议内容:
|
建议内容:
|
||||||
|
|
||||||
- 统计各插件 token 消耗
|
- 统计各插件 token 消耗
|
||||||
|
|||||||
@@ -109,107 +109,6 @@ class UnifiedLLMClient:
|
|||||||
"last_error": last_error,
|
"last_error": last_error,
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _normalize_runtime_dimension_value(value: Any, fallback_label: str = "(未标记)") -> str:
|
|
||||||
"""把分组维度统一格式化,避免后台表格里出现空白 key。
|
|
||||||
|
|
||||||
这里保留一个显式的“未标记”占位,有两个目的:
|
|
||||||
1. 便于管理员快速发现是哪个插件/场景没有正确传 scene、backend、model;
|
|
||||||
2. 比直接丢弃空值更安全,避免分析数据被“悄悄吃掉”。
|
|
||||||
"""
|
|
||||||
text = str(value or "").strip()
|
|
||||||
return text or fallback_label
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _build_runtime_breakdown_rows(
|
|
||||||
cls,
|
|
||||||
rows: List[Dict[str, Any]],
|
|
||||||
dimension: str,
|
|
||||||
fallback_label: str = "(未标记)",
|
|
||||||
) -> List[Dict[str, Any]]:
|
|
||||||
"""按指定维度聚合最近窗口调用记录。
|
|
||||||
|
|
||||||
设计说明:
|
|
||||||
1. 这里只聚合最近窗口内存数据,不引入新表,也不做持久化成本结算;
|
|
||||||
2. 第一阶段目标是先让管理员看见“哪类调用更慢、更容易失败”;
|
|
||||||
3. 等后续确认成本治理真的需要时,再把 token/金额沉淀到持久化表里。
|
|
||||||
"""
|
|
||||||
grouped_rows: Dict[str, Dict[str, Any]] = {}
|
|
||||||
|
|
||||||
for row in rows:
|
|
||||||
group_key = cls._normalize_runtime_dimension_value(row.get(dimension), fallback_label)
|
|
||||||
metric_row = grouped_rows.setdefault(
|
|
||||||
group_key,
|
|
||||||
{
|
|
||||||
"key": group_key,
|
|
||||||
"dimension": dimension,
|
|
||||||
"total_calls": 0,
|
|
||||||
"success_calls": 0,
|
|
||||||
"failed_calls": 0,
|
|
||||||
"latency_sum_ms": 0.0,
|
|
||||||
"avg_latency_ms": 0.0,
|
|
||||||
"success_rate": 0.0,
|
|
||||||
"last_call_at": "",
|
|
||||||
"last_trace_id": "",
|
|
||||||
"last_error": "",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
metric_row["total_calls"] += 1
|
|
||||||
if bool(row.get("success")):
|
|
||||||
metric_row["success_calls"] += 1
|
|
||||||
else:
|
|
||||||
metric_row["failed_calls"] += 1
|
|
||||||
|
|
||||||
metric_row["latency_sum_ms"] += float(row.get("latency_ms") or 0.0)
|
|
||||||
|
|
||||||
# deque 本身按时间顺序追加,因此后遍历到的同组记录就是更“新”的一次调用。
|
|
||||||
# 这里直接覆盖最近调用信息,成本低,也足够支撑后台最近窗口分析表。
|
|
||||||
metric_row["last_call_at"] = str(row.get("timestamp") or "").strip()
|
|
||||||
metric_row["last_trace_id"] = str(row.get("trace_id") or "").strip()
|
|
||||||
if not bool(row.get("success")) and row.get("error"):
|
|
||||||
metric_row["last_error"] = str(row.get("error") or "").strip()
|
|
||||||
|
|
||||||
result_rows: List[Dict[str, Any]] = []
|
|
||||||
for item in grouped_rows.values():
|
|
||||||
total_calls = int(item.get("total_calls") or 0)
|
|
||||||
success_calls = int(item.get("success_calls") or 0)
|
|
||||||
item["avg_latency_ms"] = round((item.get("latency_sum_ms", 0.0) / total_calls), 2) if total_calls else 0.0
|
|
||||||
item["success_rate"] = round((success_calls / total_calls) * 100, 2) if total_calls else 0.0
|
|
||||||
item.pop("latency_sum_ms", None)
|
|
||||||
result_rows.append(item)
|
|
||||||
|
|
||||||
return sorted(
|
|
||||||
result_rows,
|
|
||||||
key=lambda item: (
|
|
||||||
-int(item.get("total_calls") or 0),
|
|
||||||
-int(item.get("failed_calls") or 0),
|
|
||||||
str(item.get("key") or ""),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_runtime_breakdown(cls) -> Dict[str, Any]:
|
|
||||||
"""返回最近窗口 LLM 调用的多维度聚合分析结果。
|
|
||||||
|
|
||||||
返回结构专门给后台“AI 成本与策略中心”第一阶段使用:
|
|
||||||
1. 先围绕 scene / backend / provider / model 做聚合;
|
|
||||||
2. 重点回答成功率、平均耗时、失败次数、最近错误;
|
|
||||||
3. 暂不承诺长期留存,只服务于最近窗口的运行分析。
|
|
||||||
"""
|
|
||||||
with cls._runtime_lock:
|
|
||||||
rows = list(cls._runtime_metrics)
|
|
||||||
|
|
||||||
snapshot = cls.get_runtime_snapshot()
|
|
||||||
return {
|
|
||||||
**snapshot,
|
|
||||||
"rows": rows,
|
|
||||||
"by_scene": cls._build_runtime_breakdown_rows(rows, "scene"),
|
|
||||||
"by_backend": cls._build_runtime_breakdown_rows(rows, "backend"),
|
|
||||||
"by_provider": cls._build_runtime_breakdown_rows(rows, "provider"),
|
|
||||||
"by_model": cls._build_runtime_breakdown_rows(rows, "model"),
|
|
||||||
}
|
|
||||||
|
|
||||||
def is_available(self) -> bool:
|
def is_available(self) -> bool:
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
return False
|
return False
|
||||||
|
|||||||
Reference in New Issue
Block a user