增强插件治理中心执行表现与风险排行

This commit is contained in:
liuwei
2026-04-30 17:31:31 +08:00
parent 5487142fe1
commit e3694b9320
3 changed files with 399 additions and 10 deletions

View File

@@ -602,11 +602,107 @@ class PluginManager:
"info_count": level_counts["info"],
}
@staticmethod
def _format_runtime_timestamp(timestamp_value: Any) -> str:
"""把运行态中的 unix 时间戳转成后台可读文本。"""
try:
normalized = float(timestamp_value or 0.0)
except (TypeError, ValueError):
return ""
if normalized <= 0:
return ""
try:
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(normalized))
except (OverflowError, OSError, ValueError):
return ""
@staticmethod
def _safe_percent(numerator: Any, denominator: Any) -> float:
"""安全计算百分比,避免分母为空时抛异常。"""
try:
denominator_value = float(denominator or 0.0)
if denominator_value <= 0:
return 0.0
return round((float(numerator or 0.0) / denominator_value) * 100, 2)
except (TypeError, ValueError, ZeroDivisionError):
return 0.0
def _build_execution_summary(self, guard_snapshot: Dict[str, Any]) -> Dict[str, Any]:
"""把执行保护记录转换成更适合后台页面展示的执行摘要。
设计考虑:
1. 原始 execution_guard 更偏底层状态,前端直接消费会充满规则判断;
2. 这里统一补出成功率、总执行次数、最近成功/失败时间、最近错误摘要;
3. 未来如果还要做“高风险插件排行”“慢插件排行”,也能直接复用该摘要。
"""
guard_snapshot = dict(guard_snapshot or {})
success_count_total = int(guard_snapshot.get("success_count_total", 0) or 0)
failure_count_total = int(guard_snapshot.get("failure_count_total", 0) or 0)
timeout_count_total = int(guard_snapshot.get("timeout_count_total", 0) or 0)
consecutive_failures = int(guard_snapshot.get("consecutive_failures", 0) or 0)
consecutive_timeouts = int(guard_snapshot.get("consecutive_timeouts", 0) or 0)
last_process_time_ms = round(float(guard_snapshot.get("last_process_time_ms", 0.0) or 0.0), 2)
circuit_state = str(guard_snapshot.get("circuit_state", "closed") or "closed").strip().lower()
last_error_message = str(guard_snapshot.get("last_error_message") or "").strip()
if len(last_error_message) > 240:
last_error_message = f"{last_error_message[:237]}..."
total_executions = success_count_total + failure_count_total
success_rate = self._safe_percent(success_count_total, total_executions)
timeout_rate = self._safe_percent(timeout_count_total, total_executions)
last_success_at_text = self._format_runtime_timestamp(guard_snapshot.get("last_success_at"))
last_failure_at_text = self._format_runtime_timestamp(guard_snapshot.get("last_failure_at"))
status = "info"
summary = "暂无执行样本"
if total_executions > 0:
status = "healthy"
summary = (
f"累计执行 {total_executions} 次,成功率 {success_rate}%"
f"最近耗时 {last_process_time_ms}ms"
)
# 熔断打开是最明确的高风险信号,应优先标记为 error。
if circuit_state == "open":
status = "error"
summary = (
f"插件当前处于熔断中,连续失败 {consecutive_failures} 次,"
f"恢复剩余 {int(guard_snapshot.get('open_remaining_seconds', 0) or 0)}s"
)
elif failure_count_total > 0 or timeout_count_total > 0 or consecutive_failures > 0 or consecutive_timeouts > 0:
status = "warning"
summary = (
f"累计失败 {failure_count_total} 次,超时 {timeout_count_total} 次,"
f"成功率 {success_rate}%"
)
return {
"status": status,
"summary": summary,
"total_executions": total_executions,
"success_count_total": success_count_total,
"failure_count_total": failure_count_total,
"timeout_count_total": timeout_count_total,
"success_rate": success_rate,
"timeout_rate": timeout_rate,
"consecutive_failures": consecutive_failures,
"consecutive_timeouts": consecutive_timeouts,
"last_process_time_ms": last_process_time_ms,
"last_success_at_text": last_success_at_text,
"last_failure_at_text": last_failure_at_text,
"last_error_message": last_error_message,
"last_failure_type": str(guard_snapshot.get("last_failure_type") or "").strip(),
"last_timeout_seconds": int(guard_snapshot.get("last_timeout_seconds", 0) or 0),
"circuit_state": circuit_state,
"open_remaining_seconds": int(guard_snapshot.get("open_remaining_seconds", 0) or 0),
}
def _build_plugin_snapshot(self, plugin: PluginInterface) -> Dict[str, Any]:
"""为已加载插件生成标准治理快照。"""
module_name = self._get_module_name_from_plugin(plugin) or "unknown"
runtime_record = self._get_module_runtime_state(module_name)
guard_snapshot = self.get_plugin_guard_snapshot(module_name)
execution_summary = self._build_execution_summary(guard_snapshot)
config_path = plugin.get_config_path()
config_overview = self._read_plugin_config_overview(config_path)
commands = self._collect_plugin_commands(plugin)
@@ -648,12 +744,14 @@ class PluginManager:
"runtime_state": runtime_record.get("state", "loaded"),
"runtime_message": runtime_record.get("message", ""),
"execution_guard": guard_snapshot,
"execution_summary": execution_summary,
}
def _build_unloaded_plugin_snapshot(self, module_name: str) -> Dict[str, Any]:
"""为未成功加载的插件模块生成治理快照。"""
runtime_record = self._get_module_runtime_state(module_name)
guard_snapshot = self.get_plugin_guard_snapshot(module_name)
execution_summary = self._build_execution_summary(guard_snapshot)
config_path = os.path.join(self.plugin_dir, module_name, "config.toml")
if not os.path.exists(config_path):
config_path = os.path.join(self.plugin_dir, f"{module_name}", "config.toml")
@@ -700,6 +798,7 @@ class PluginManager:
"runtime_state": runtime_state or "discovered",
"runtime_message": runtime_record.get("message", ""),
"execution_guard": guard_snapshot,
"execution_summary": execution_summary,
}
@staticmethod