Revert "增强首页LLM运行态与任务调度监控卡片"

This reverts commit 5487142fe1.
2026-05-01 12:45:39 +08:00
parent d1c2aa06f0
commit 9b9059a6d9
2 changed files with 69 additions and 448 deletions
--- a/admin/dashboard/blueprints/system.py
+++ b/admin/dashboard/blueprints/system.py
@@ -16,7 +16,6 @@ from utils.markdown_to_image import get_md2img_health_snapshot, warmup_md2img_br
 from utils.ai.llm_registry import LLMRegistry
 from base.plugin_common.plugin_interface import PluginStatus
 from utils.ai.unified_llm import UnifiedLLMClient
-from utils.decorator.async_job import async_job

 # 创建系统信息蓝图
 system_bp = Blueprint('system', __name__)
@@ -240,269 +239,6 @@ def _extract_redis_runtime_snapshot(db_manager) -> dict:
        return snapshot


-def _parse_snapshot_datetime(value: str | None) -> datetime | None:
-    """把首页摘要里常用的时间字符串安全转换为 datetime。"""
-    text = str(value or "").strip()
-    if not text:
-        return None
-    try:
-        return datetime.strptime(text, "%Y-%m-%d %H:%M:%S")
-    except ValueError:
-        return None
-
-
-def _count_enabled_runtime_items(items) -> int:
-    """统计启用项数量。
-
-    兼容原因：
-    1. 新版目录模型里 providers/backends/scenes 可能是 dict；
-    2. 后台页面某些兜底逻辑里也可能给出 list；
-    3. 旧配置没有 enabled 字段时，直接按存在即计数。
-    """
-    rows = []
-    if isinstance(items, dict):
-        rows = list(items.values())
-    elif isinstance(items, list):
-        rows = list(items)
-    count = 0
-    for row in rows:
-        if not isinstance(row, dict):
-            continue
-        if "enabled" not in row or bool(row.get("enabled", True)):
-            count += 1
-    return count
-
-
-def _extract_llm_catalog_summary() -> dict:
-    """提取首页 LLM 路由配置摘要。
-
-    这里不做真实调用探测，只回答两个问题：
-    1. 运行时有没有可用的场景与目标；
-    2. 管理员当前看到的调用记录，大致落到了哪一套路由上。
-    """
-    try:
-        catalog = LLMRegistry.get_catalog() or {}
-        if catalog:
-            providers = catalog.get("providers", {}) or {}
-            dify_apps = catalog.get("dify_apps", {}) or {}
-            backends = catalog.get("backends", {}) or {}
-            scenes = catalog.get("scenes", {}) or {}
-            default_scene = str(catalog.get("default_scene") or "").strip()
-            default_backend = str(LLMRegistry.get_scene_backend_name(default_scene) or "").strip() if default_scene else ""
-            return {
-                "provider_count": _count_enabled_runtime_items(providers),
-                "scene_count": _count_enabled_runtime_items(scenes),
-                "target_count": _count_enabled_runtime_items(backends) + _count_enabled_runtime_items(dify_apps),
-                "default_scene": default_scene,
-                "default_backend": default_backend,
-                "has_routing": _count_enabled_runtime_items(scenes) > 0,
-            }
-
-        # 目录模型不存在时回退到 legacy 视图，至少让首页知道“有没有基础路由配置”。
-        legacy_llm = LLMRegistry.get_llm_config() or {}
-        scenes = legacy_llm.get("scenes", {}) or {}
-        backends = legacy_llm.get("backends", {}) or {}
-        default_backend = str(legacy_llm.get("default_backend") or "").strip()
-        return {
-            "provider_count": 0,
-            "scene_count": len(scenes) if isinstance(scenes, dict) else 0,
-            "target_count": len(backends) if isinstance(backends, dict) else 0,
-            "default_scene": "",
-            "default_backend": default_backend,
-            "has_routing": bool(scenes) or bool(default_backend),
-        }
-    except Exception as llm_catalog_error:
-        logger.warning(f"提取 LLM 路由摘要失败: {llm_catalog_error}")
-        return {
-            "provider_count": 0,
-            "scene_count": 0,
-            "target_count": 0,
-            "default_scene": "",
-            "default_backend": "",
-            "has_routing": False,
-        }
-
-
-def _extract_ai_runtime_snapshot() -> dict:
-    """构建首页 LLM 运行态摘要。
-
-    设计原则：
-    1. 首页只展示“最近调用窗口”的被动观测结果，不主动发请求探活；
-    2. 把最近调用和静态路由配置拼在一起，避免管理员只看到“成功/失败”却不知道走的是哪条链路；
-    3. 如果近期没有调用，也明确区分“未配置”和“已配置但当前空闲”。
-    """
-    runtime_snapshot = UnifiedLLMClient.get_runtime_snapshot() or {}
-    last_call = dict(runtime_snapshot.get("last_call") or {})
-    catalog_summary = _extract_llm_catalog_summary()
-
-    total_calls = _safe_int(runtime_snapshot.get("total_calls"))
-    failed_calls = _safe_int(runtime_snapshot.get("failed_calls"))
-    success_rate = _safe_float(runtime_snapshot.get("success_rate"))
-    avg_latency_ms = _safe_float(runtime_snapshot.get("avg_latency_ms"))
-    last_error = str(runtime_snapshot.get("last_error") or "").strip()
-
-    snapshot = {
-        **runtime_snapshot,
-        "last_call": last_call,
-        "provider_count": catalog_summary.get("provider_count", 0),
-        "scene_count": catalog_summary.get("scene_count", 0),
-        "target_count": catalog_summary.get("target_count", 0),
-        "default_scene": catalog_summary.get("default_scene", ""),
-        "default_backend": catalog_summary.get("default_backend", ""),
-        "has_routing": bool(catalog_summary.get("has_routing")),
-        "last_provider": str(last_call.get("provider") or "").strip(),
-        "last_backend": str(last_call.get("backend") or "").strip(),
-        "last_scene": str(last_call.get("scene") or "").strip(),
-        "last_model": str(last_call.get("model") or "").strip(),
-        "last_timestamp": str(last_call.get("timestamp") or "").strip(),
-        "last_latency_ms": _safe_float(last_call.get("latency_ms")),
-    }
-
-    if not snapshot["has_routing"]:
-        snapshot["status"] = "warning"
-        snapshot["summary"] = "当前未发现完整的 LLM 路由配置，建议先检查默认场景与后端绑定"
-        return snapshot
-
-    if total_calls <= 0:
-        snapshot["status"] = "warning"
-        snapshot["summary"] = (
-            f"已配置 {snapshot['scene_count']} 个场景、{snapshot['target_count']} 个目标，"
-            "最近窗口内暂无统一 LLM 调用记录"
-        )
-        return snapshot
-
-    if failed_calls >= total_calls and total_calls > 0:
-        snapshot["status"] = "danger"
-        snapshot["summary"] = (
-            f"最近 {total_calls} 次调用全部失败，成功率 {success_rate:.2f}%，"
-            f"平均耗时 {avg_latency_ms:.2f}ms"
-        )
-        return snapshot
-
-    if failed_calls > 0 or last_error:
-        snapshot["status"] = "warning"
-        snapshot["summary"] = (
-            f"最近 {total_calls} 次调用中失败 {failed_calls} 次，成功率 {success_rate:.2f}%，"
-            f"平均耗时 {avg_latency_ms:.2f}ms"
-        )
-        return snapshot
-
-    snapshot["status"] = "healthy"
-    snapshot["summary"] = (
-        f"最近 {total_calls} 次调用全部成功，成功率 {success_rate:.2f}%，"
-        f"平均耗时 {avg_latency_ms:.2f}ms"
-    )
-    return snapshot
-
-
-def _extract_scheduler_runtime_snapshot() -> dict:
-    """聚合 async_job 运行态，生成首页任务调度摘要。
-
-    这里的目标不是替代完整任务页，而是回答管理员最常问的几件事：
-    1. 任务有没有正常装载；
-    2. 是否存在失败或非法调度；
-    3. 下一次任务大概何时执行；
-    4. 当前更多是系统任务，还是插件任务在跑。
-    """
-    runtime_rows = async_job.get_jobs_snapshot()
-    next_run_candidates = []
-    failed_rows = []
-    system_job_count = 0
-    plugin_job_count = 0
-
-    for row in runtime_rows:
-        job_key = str(row.get("job_key") or "").strip()
-        owner_name = str(row.get("owner_name") or "system").strip().lower()
-        next_run_at = _parse_snapshot_datetime(row.get("next_run_at"))
-        last_status = str(row.get("last_status") or "").strip().lower()
-
-        if job_key.startswith("plugin_schedule:") or owner_name != "system":
-            plugin_job_count += 1
-        else:
-            system_job_count += 1
-
-        if bool(row.get("enabled")) and next_run_at:
-            next_run_candidates.append(next_run_at)
-        if last_status in {"failed", "invalid_schedule"}:
-            failed_rows.append(row)
-
-    latest_failed_row = {}
-    if failed_rows:
-        failed_rows.sort(
-            key=lambda row: (
-                _parse_snapshot_datetime(row.get("updated_at"))
-                or _parse_snapshot_datetime(row.get("last_run_at"))
-                or datetime.min
-            ),
-            reverse=True,
-        )
-        latest_failed_row = failed_rows[0]
-
-    invalid_jobs = sum(
-        1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "invalid_schedule"
-    )
-    total_jobs = len(runtime_rows)
-    enabled_jobs = sum(1 for row in runtime_rows if bool(row.get("enabled")))
-    running_jobs = sum(1 for row in runtime_rows if bool(row.get("running")))
-    failed_jobs = len(failed_rows)
-    paused_jobs = total_jobs - enabled_jobs
-    never_run_jobs = sum(1 for row in runtime_rows if str(row.get("last_status") or "").strip().lower() == "never")
-    next_run_at_text = min(next_run_candidates).strftime("%Y-%m-%d %H:%M:%S") if next_run_candidates else ""
-    latest_failed_error = str(latest_failed_row.get("last_error") or "").strip()
-    if len(latest_failed_error) > 120:
-        latest_failed_error = f"{latest_failed_error[:117]}..."
-
-    snapshot = {
-        "status": "healthy",
-        "summary": "任务调度运行正常",
-        "total_jobs": total_jobs,
-        "enabled_jobs": enabled_jobs,
-        "running_jobs": running_jobs,
-        "failed_jobs": failed_jobs,
-        "invalid_jobs": invalid_jobs,
-        "paused_jobs": paused_jobs,
-        "never_run_jobs": never_run_jobs,
-        "system_job_count": system_job_count,
-        "plugin_job_count": plugin_job_count,
-        "next_run_at": next_run_at_text,
-        "latest_failed_job_name": str(latest_failed_row.get("name") or "").strip(),
-        "latest_failed_error": latest_failed_error,
-    }
-
-    if total_jobs <= 0:
-        snapshot["status"] = "warning"
-        snapshot["summary"] = "当前没有加载任何定时任务"
-        return snapshot
-
-    if invalid_jobs > 0:
-        snapshot["status"] = "danger"
-        snapshot["summary"] = f"发现 {invalid_jobs} 个任务调度配置非法，建议立即检查任务页"
-        return snapshot
-
-    if failed_jobs > 0:
-        snapshot["status"] = "warning"
-        snapshot["summary"] = (
-            f"最近有 {failed_jobs} 个任务执行失败，"
-            f"下一次执行 {next_run_at_text or '暂未计算'}"
-        )
-        return snapshot
-
-    if enabled_jobs <= 0:
-        snapshot["status"] = "warning"
-        snapshot["summary"] = "任务已加载，但当前没有启用中的调度任务"
-        return snapshot
-
-    if running_jobs > 0:
-        snapshot["summary"] = (
-            f"当前有 {running_jobs} 个任务执行中，"
-            f"下一次执行 {next_run_at_text or '暂未计算'}"
-        )
-        return snapshot
-
-    snapshot["summary"] = f"已启用 {enabled_jobs} 个任务，下一次执行 {next_run_at_text or '暂未计算'}"
-    return snapshot
-
-
 def _legacy_llm_to_catalog(legacy_llm: dict) -> dict:
    """把旧 llm(backends/scenes) 结构转换为新目录结构（仅用于兜底展示）。

@@ -872,6 +608,20 @@ def api_system_health_summary():
        mysql_snapshot = _extract_mysql_runtime_snapshot(server.db_manager)
        redis_snapshot = _extract_redis_runtime_snapshot(server.db_manager)

+        # md2img 健康快照已经有现成实现，这里只做聚合，不主动预热运行时。
+        md2img_snapshot = get_md2img_health_snapshot(ensure_runtime=False) or {}
+        browser_ready = bool(
+            md2img_snapshot.get("browser_ready")
+            or md2img_snapshot.get("playwright_ready")
+            or md2img_snapshot.get("ready")
+        )
+        runtime_ready = bool(
+            md2img_snapshot.get("runtime_ready")
+            or md2img_snapshot.get("runtime_initialized")
+            or md2img_snapshot.get("initialized")
+        )
+        md2img_healthy = runtime_ready and browser_ready
+
        # 首页只需要“够判断”的轻量结论，因此统一产出 status + summary 文本，前端无需重复拼装业务规则。
        robot_running = bool(getattr(robot, "ipad_running", False))
        robot_nickname = str(getattr(robot, "nickname", "") or "").strip()
@@ -897,11 +647,37 @@ def api_system_health_summary():
            error_status = "healthy"
            error_summary = "近 24 小时未记录到异常"

-        # 首页 AI 卡片升级为“运行态 + 路由摘要”，仍然保持被动观测，不主动探活。
-        ai_runtime = _extract_ai_runtime_snapshot()
+        if md2img_healthy:
+            md2img_status = "healthy"
+            md2img_summary = "运行时与浏览器均已就绪"
+        elif runtime_ready or browser_ready:
+            md2img_status = "warning"
+            md2img_summary = "运行时部分可用，建议检查预热状态"
+        else:
+            md2img_status = "danger"
+            md2img_summary = "运行时未就绪，相关转图能力可能不可用"

-        # Markdown 转图更适合保留在专门页面里排障，首页右侧改成更通用的任务调度摘要。
-        scheduler_runtime = _extract_scheduler_runtime_snapshot()
+        # AI 运行态：
+        # 1. 统一从 UnifiedLLMClient 最近调用窗口读取，避免各插件单独维护监控数据；
+        # 2. 若当前窗口还没有调用记录，就明确返回“暂无调用”，避免误判成异常。
+        ai_runtime = UnifiedLLMClient.get_runtime_snapshot()
+        ai_total_calls = int(ai_runtime.get("total_calls") or 0)
+        ai_failed_calls = int(ai_runtime.get("failed_calls") or 0)
+        if ai_total_calls <= 0:
+            ai_status = "warning"
+            ai_summary = "最近窗口内暂无统一 LLM 调用记录"
+        elif ai_failed_calls > 0:
+            ai_status = "warning"
+            ai_summary = (
+                f"最近 {ai_total_calls} 次调用中失败 {ai_failed_calls} 次，"
+                f"平均耗时 {ai_runtime.get('avg_latency_ms', 0)}ms"
+            )
+        else:
+            ai_status = "healthy"
+            ai_summary = (
+                f"最近 {ai_total_calls} 次调用全部成功，"
+                f"平均耗时 {ai_runtime.get('avg_latency_ms', 0)}ms"
+            )

        return jsonify({
            "success": True,
@@ -943,10 +719,17 @@ def api_system_health_summary():
                    "redis": redis_snapshot,
                },
                "ai_runtime": {
+                    "status": ai_status,
+                    "summary": ai_summary,
                    **ai_runtime,
                },
-                "scheduler": {
-                    **scheduler_runtime,
+                "md2img": {
+                    "status": md2img_status,
+                    "healthy": md2img_healthy,
+                    "runtime_ready": runtime_ready,
+                    "browser_ready": browser_ready,
+                    "summary": md2img_summary,
+                    "detail": md2img_snapshot,
                },
            }
        })
--- a/admin/dashboard/templates/index.html
+++ b/admin/dashboard/templates/index.html
@@ -131,7 +131,7 @@
                <div class="section-heading section-heading--stack">
                    <div>
                        <h3>系统健康快照</h3>
-                        <p>把连接状态、插件运行、异常数量、LLM 运行态与任务调度集中到一个面板里。</p>
+                        <p>把连接状态、插件运行、异常数量与转图运行时集中到一个面板里。</p>
                    </div>
                    <div class="health-overview-meta">
                        <span class="health-overview-meta__label">最近刷新</span>
@@ -394,38 +394,15 @@
                        status: 'warning',
                        total_calls: 0,
                        failed_calls: 0,
-                        success_rate: 0,
                        avg_latency_ms: 0,
                        summary: '加载中...',
-                        last_call: {},
-                        scene_count: 0,
-                        target_count: 0,
-                        provider_count: 0,
-                        has_routing: false,
-                        default_scene: '',
-                        default_backend: '',
-                        last_provider: '',
-                        last_backend: '',
-                        last_scene: '',
-                        last_model: '',
-                        last_timestamp: '',
-                        last_latency_ms: 0,
-                        last_error: ''
+                        last_call: {}
                    },
-                    scheduler: {
+                    md2img: {
                        status: 'warning',
-                        total_jobs: 0,
-                        enabled_jobs: 0,
-                        running_jobs: 0,
-                        failed_jobs: 0,
-                        invalid_jobs: 0,
-                        paused_jobs: 0,
-                        never_run_jobs: 0,
-                        system_job_count: 0,
-                        plugin_job_count: 0,
-                        next_run_at: '',
-                        latest_failed_job_name: '',
-                        latest_failed_error: '',
+                        healthy: false,
+                        runtime_ready: false,
+                        browser_ready: false,
                        summary: '加载中...'
                    }
                },
@@ -469,7 +446,7 @@
                const errors = this.healthSummary.errors || {};
                const infrastructure = this.healthSummary.infrastructure || {};
                const aiRuntime = this.healthSummary.ai_runtime || {};
-                const scheduler = this.healthSummary.scheduler || {};
+                const md2img = this.healthSummary.md2img || {};
                return [
                    {
                        key: 'robot',
@@ -506,23 +483,19 @@
                    },
                    {
                        key: 'ai_runtime',
-                        title: 'LLM 运行态',
+                        title: 'AI 运行态',
                        status: aiRuntime.status || 'warning',
-                        value: (aiRuntime.total_calls || 0) > 0
-                            ? `${this.formatMetricNumber(aiRuntime.success_rate, 2)}%`
-                            : `${aiRuntime.scene_count || 0} 个场景`,
+                        value: `${aiRuntime.avg_latency_ms || 0} ms`,
                        summary: aiRuntime.summary || '暂无状态',
-                        serviceBlocks: this.buildAiRuntimeServiceBlocks(aiRuntime),
-                        extra: this.buildAiRuntimeExtra(aiRuntime)
+                        extra: `最近调用 ${aiRuntime.total_calls || 0} 次，失败 ${aiRuntime.failed_calls || 0} 次`
                    },
                    {
-                        key: 'scheduler',
-                        title: '任务调度',
-                        status: scheduler.status || 'warning',
-                        value: `${scheduler.enabled_jobs || 0} / ${scheduler.total_jobs || 0}`,
-                        summary: scheduler.summary || '暂无状态',
-                        serviceBlocks: this.buildSchedulerServiceBlocks(scheduler),
-                        extra: this.buildSchedulerExtra(scheduler)
+                        key: 'md2img',
+                        title: 'Markdown 转图',
+                        status: md2img.status || 'warning',
+                        value: md2img.healthy ? '就绪' : '待检查',
+                        summary: md2img.summary || '暂无状态',
+                        extra: `Runtime ${md2img.runtime_ready ? '已就绪' : '未就绪'} / Browser ${md2img.browser_ready ? '已就绪' : '未就绪'}`
                    }
                ];
            }
@@ -684,141 +657,6 @@
                    }
                ];
            },
-            buildAiRuntimeServiceBlocks(aiRuntime) {
-                // AI 卡片拆成“路由配置”和“最近调用”两个子面板，
-                // 让首页既能判断配置是否完整，也能快速定位最近请求到底走了哪条链路。
-                return [
-                    {
-                        key: 'ai-routing',
-                        title: '路由配置',
-                        status: aiRuntime.has_routing ? 'healthy' : 'warning',
-                        summary: aiRuntime.default_scene
-                            ? `默认场景：${aiRuntime.default_scene}`
-                            : '当前未设置默认场景',
-                        metrics: [
-                            {
-                                label: '场景数量',
-                                value: this.formatMetricNumber(aiRuntime.scene_count)
-                            },
-                            {
-                                label: '目标数量',
-                                value: this.formatMetricNumber(aiRuntime.target_count)
-                            },
-                            {
-                                label: 'Provider 模板',
-                                value: this.formatMetricNumber(aiRuntime.provider_count)
-                            },
-                            {
-                                label: '默认后端',
-                                value: aiRuntime.default_backend || '-'
-                            }
-                        ]
-                    },
-                    {
-                        key: 'ai-last-call',
-                        title: '最近调用',
-                        status: (aiRuntime.failed_calls || 0) > 0 ? 'warning' : ((aiRuntime.total_calls || 0) > 0 ? 'healthy' : 'warning'),
-                        summary: aiRuntime.last_timestamp
-                            ? `最近一次记录时间：${aiRuntime.last_timestamp}`
-                            : '当前窗口内暂无调用记录',
-                        metrics: [
-                            {
-                                label: 'Provider',
-                                value: aiRuntime.last_provider || '-'
-                            },
-                            {
-                                label: 'Backend',
-                                value: aiRuntime.last_backend || '-'
-                            },
-                            {
-                                label: 'Scene',
-                                value: aiRuntime.last_scene || '-'
-                            },
-                            {
-                                label: '模型',
-                                value: aiRuntime.last_model || '-'
-                            },
-                            {
-                                label: '最近耗时',
-                                value: `${this.formatMetricNumber(aiRuntime.last_latency_ms, 2)} ms`
-                            },
-                            {
-                                label: '最近错误',
-                                value: aiRuntime.last_error || '无'
-                            }
-                        ]
-                    }
-                ];
-            },
-            buildAiRuntimeExtra(aiRuntime) {
-                return `最近调用 ${aiRuntime.total_calls || 0} 次，失败 ${aiRuntime.failed_calls || 0} 次，平均耗时 ${this.formatMetricNumber(aiRuntime.avg_latency_ms, 2)} ms`;
-            },
-            buildSchedulerServiceBlocks(scheduler) {
-                // 任务调度卡片只保留首页最需要的摘要：
-                // 任务装载量、执行态、失败数，以及系统任务/插件任务的大致构成。
-                return [
-                    {
-                        key: 'scheduler-overview',
-                        title: '任务装载',
-                        status: scheduler.enabled_jobs > 0 ? 'healthy' : 'warning',
-                        summary: scheduler.next_run_at
-                            ? `下一次执行：${scheduler.next_run_at}`
-                            : '当前没有可计算的下一次执行时间',
-                        metrics: [
-                            {
-                                label: '启用任务',
-                                value: this.formatMetricNumber(scheduler.enabled_jobs)
-                            },
-                            {
-                                label: '暂停任务',
-                                value: this.formatMetricNumber(scheduler.paused_jobs)
-                            },
-                            {
-                                label: '系统任务',
-                                value: this.formatMetricNumber(scheduler.system_job_count)
-                            },
-                            {
-                                label: '插件任务',
-                                value: this.formatMetricNumber(scheduler.plugin_job_count)
-                            }
-                        ]
-                    },
-                    {
-                        key: 'scheduler-runtime',
-                        title: '执行状态',
-                        status: scheduler.status || 'warning',
-                        summary: scheduler.latest_failed_job_name
-                            ? `最近失败任务：${scheduler.latest_failed_job_name}`
-                            : '当前未发现最近失败任务',
-                        metrics: [
-                            {
-                                label: '执行中',
-                                value: this.formatMetricNumber(scheduler.running_jobs)
-                            },
-                            {
-                                label: '失败任务',
-                                value: this.formatMetricNumber(scheduler.failed_jobs)
-                            },
-                            {
-                                label: '非法调度',
-                                value: this.formatMetricNumber(scheduler.invalid_jobs)
-                            },
-                            {
-                                label: '未执行过',
-                                value: this.formatMetricNumber(scheduler.never_run_jobs)
-                            }
-                        ]
-                    }
-                ];
-            },
-            buildSchedulerExtra(scheduler) {
-                if (scheduler.latest_failed_error) {
-                    return `最近失败原因：${scheduler.latest_failed_error}`;
-                }
-                return scheduler.next_run_at
-                    ? `下次执行时间：${scheduler.next_run_at}`
-                    : '当前暂无可用的下一次执行时间';
-            },
            renderPieChart(chartId, usageValue, label) {
                const ctx = document.getElementById(chartId);
                if (!ctx) return;