@@ -42,203 +42,6 @@ def _save_system_yaml(config_obj: dict) -> None:
|
||||
yaml.safe_dump(config_obj, f, allow_unicode=True, sort_keys=False)
|
||||
|
||||
|
||||
def _safe_int(value, default: int = 0) -> int:
|
||||
"""把数据库 / Redis 返回的字符串数字安全转成整数。"""
|
||||
try:
|
||||
if value in (None, ""):
|
||||
return default
|
||||
return int(float(value))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _safe_float(value, default: float = 0.0) -> float:
|
||||
"""把数据库 / Redis 返回的值安全转成浮点数。"""
|
||||
try:
|
||||
if value in (None, ""):
|
||||
return default
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _format_bytes_to_mb(value: int) -> float:
|
||||
"""把字节数转换为 MB,保留两位小数便于首页摘要展示。"""
|
||||
return round((_safe_float(value, 0.0) / 1024 / 1024), 2)
|
||||
|
||||
|
||||
def _extract_mysql_runtime_snapshot(db_manager) -> dict:
|
||||
"""采集 MySQL 运行态摘要。
|
||||
|
||||
首页目标不是替代 DBA 工具,而是让管理员一眼判断:
|
||||
1. 数据库是不是活着;
|
||||
2. 当前连接压力高不高;
|
||||
3. 当前库规模是否已经明显变大;
|
||||
4. 有没有必要继续深入到更专业的监控页排查。
|
||||
"""
|
||||
snapshot = {
|
||||
"status": "healthy",
|
||||
"summary": "连接正常",
|
||||
"database": db_manager.get_mysql_database_name(),
|
||||
"version": "",
|
||||
"threads_connected": 0,
|
||||
"threads_running": 0,
|
||||
"max_connections": 0,
|
||||
"connection_usage_percent": 0.0,
|
||||
"questions_per_second": 0.0,
|
||||
"uptime_seconds": 0,
|
||||
"table_count": 0,
|
||||
"schema_size_mb": 0.0,
|
||||
"slow_query_threshold_ms": db_manager.get_slow_query_threshold_ms(),
|
||||
}
|
||||
|
||||
mysql_conn = db_manager.get_mysql_connection()
|
||||
try:
|
||||
with mysql_conn.cursor(dictionary=True) as cursor:
|
||||
# 基础探活与版本识别:
|
||||
# 1. SELECT VERSION() 成本极低;
|
||||
# 2. 相比只做 SELECT 1,它还能顺便拿到版本信息;
|
||||
# 3. 首页卡片里显示版本,方便线上排查“是不是某台库版本不一致”。
|
||||
cursor.execute("SELECT VERSION() AS version, DATABASE() AS database_name")
|
||||
version_row = cursor.fetchone() or {}
|
||||
snapshot["version"] = str(version_row.get("version") or "").strip()
|
||||
snapshot["database"] = str(version_row.get("database_name") or snapshot["database"] or "").strip()
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
SHOW GLOBAL STATUS
|
||||
WHERE Variable_name IN ('Threads_connected', 'Threads_running', 'Questions', 'Uptime')
|
||||
"""
|
||||
)
|
||||
status_rows = cursor.fetchall() or []
|
||||
status_map = {
|
||||
str(row.get("Variable_name") or "").strip(): row.get("Value")
|
||||
for row in status_rows
|
||||
}
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
SHOW GLOBAL VARIABLES
|
||||
WHERE Variable_name IN ('max_connections')
|
||||
"""
|
||||
)
|
||||
variable_rows = cursor.fetchall() or []
|
||||
variable_map = {
|
||||
str(row.get("Variable_name") or "").strip(): row.get("Value")
|
||||
for row in variable_rows
|
||||
}
|
||||
|
||||
# information_schema 聚合虽然比 SELECT 1 重一点,但仍属于轻量级元信息查询:
|
||||
# 1. 只在首页 30 秒级刷新一次,成本可接受;
|
||||
# 2. 能直接给出当前业务库表数量与体量变化;
|
||||
# 3. 对判断“是不是消息表膨胀导致后台变慢”很有帮助。
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT
|
||||
COUNT(*) AS table_count,
|
||||
COALESCE(SUM(data_length + index_length), 0) AS schema_size_bytes
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = DATABASE()
|
||||
"""
|
||||
)
|
||||
schema_row = cursor.fetchone() or {}
|
||||
|
||||
snapshot["threads_connected"] = _safe_int(status_map.get("Threads_connected"))
|
||||
snapshot["threads_running"] = _safe_int(status_map.get("Threads_running"))
|
||||
snapshot["max_connections"] = _safe_int(variable_map.get("max_connections"))
|
||||
snapshot["uptime_seconds"] = _safe_int(status_map.get("Uptime"))
|
||||
total_questions = _safe_int(status_map.get("Questions"))
|
||||
if snapshot["uptime_seconds"] > 0:
|
||||
snapshot["questions_per_second"] = round(total_questions / snapshot["uptime_seconds"], 2)
|
||||
if snapshot["max_connections"] > 0:
|
||||
snapshot["connection_usage_percent"] = round(
|
||||
(snapshot["threads_connected"] / snapshot["max_connections"]) * 100,
|
||||
1,
|
||||
)
|
||||
snapshot["table_count"] = _safe_int(schema_row.get("table_count"))
|
||||
snapshot["schema_size_mb"] = _format_bytes_to_mb(schema_row.get("schema_size_bytes"))
|
||||
|
||||
if snapshot["connection_usage_percent"] >= 80 or snapshot["threads_running"] >= 12:
|
||||
snapshot["status"] = "warning"
|
||||
snapshot["summary"] = (
|
||||
f"连接压力偏高:已连接 {snapshot['threads_connected']} / {snapshot['max_connections']},"
|
||||
f"运行中线程 {snapshot['threads_running']}"
|
||||
)
|
||||
else:
|
||||
snapshot["summary"] = (
|
||||
f"连接正常:已连接 {snapshot['threads_connected']} / {snapshot['max_connections'] or '-'},"
|
||||
f"QPS {snapshot['questions_per_second']}"
|
||||
)
|
||||
return snapshot
|
||||
except Exception as mysql_error:
|
||||
snapshot["status"] = "danger"
|
||||
snapshot["summary"] = f"MySQL 探测失败: {mysql_error}"
|
||||
return snapshot
|
||||
finally:
|
||||
mysql_conn.close()
|
||||
|
||||
|
||||
def _extract_redis_runtime_snapshot(db_manager) -> dict:
|
||||
"""采集 Redis 运行态摘要。"""
|
||||
redis_config = getattr(db_manager, "redis_config", {}) or {}
|
||||
snapshot = {
|
||||
"status": "healthy",
|
||||
"summary": "连接正常",
|
||||
"db_index": _safe_int(redis_config.get("db", 0)),
|
||||
"key_count": 0,
|
||||
"connected_clients": 0,
|
||||
"blocked_clients": 0,
|
||||
"ops_per_sec": 0,
|
||||
"used_memory_human": "",
|
||||
"used_memory_peak_human": "",
|
||||
"memory_usage_percent": 0.0,
|
||||
"uptime_seconds": 0,
|
||||
"hit_rate_percent": 0.0,
|
||||
}
|
||||
|
||||
try:
|
||||
redis_conn = db_manager.get_redis_connection()
|
||||
redis_conn.ping()
|
||||
info = redis_conn.info() or {}
|
||||
snapshot["key_count"] = _safe_int(redis_conn.dbsize())
|
||||
snapshot["connected_clients"] = _safe_int(info.get("connected_clients"))
|
||||
snapshot["blocked_clients"] = _safe_int(info.get("blocked_clients"))
|
||||
snapshot["ops_per_sec"] = _safe_int(info.get("instantaneous_ops_per_sec"))
|
||||
snapshot["used_memory_human"] = str(info.get("used_memory_human") or "").strip()
|
||||
snapshot["used_memory_peak_human"] = str(info.get("used_memory_peak_human") or "").strip()
|
||||
snapshot["uptime_seconds"] = _safe_int(info.get("uptime_in_seconds"))
|
||||
|
||||
maxmemory = _safe_int(info.get("maxmemory"))
|
||||
used_memory = _safe_int(info.get("used_memory"))
|
||||
if maxmemory > 0:
|
||||
snapshot["memory_usage_percent"] = round((used_memory / maxmemory) * 100, 1)
|
||||
|
||||
keyspace_hits = _safe_int(info.get("keyspace_hits"))
|
||||
keyspace_misses = _safe_int(info.get("keyspace_misses"))
|
||||
if (keyspace_hits + keyspace_misses) > 0:
|
||||
snapshot["hit_rate_percent"] = round(
|
||||
(keyspace_hits / (keyspace_hits + keyspace_misses)) * 100,
|
||||
1,
|
||||
)
|
||||
|
||||
if snapshot["blocked_clients"] > 0 or snapshot["memory_usage_percent"] >= 80:
|
||||
snapshot["status"] = "warning"
|
||||
snapshot["summary"] = (
|
||||
f"缓存压力需关注:keys {snapshot['key_count']},"
|
||||
f"clients {snapshot['connected_clients']},ops/s {snapshot['ops_per_sec']}"
|
||||
)
|
||||
else:
|
||||
snapshot["summary"] = (
|
||||
f"缓存正常:keys {snapshot['key_count']},"
|
||||
f"clients {snapshot['connected_clients']},ops/s {snapshot['ops_per_sec']}"
|
||||
)
|
||||
return snapshot
|
||||
except Exception as redis_error:
|
||||
snapshot["status"] = "danger"
|
||||
snapshot["summary"] = f"Redis 探测失败: {redis_error}"
|
||||
return snapshot
|
||||
|
||||
|
||||
def _legacy_llm_to_catalog(legacy_llm: dict) -> dict:
|
||||
"""把旧 llm(backends/scenes) 结构转换为新目录结构(仅用于兜底展示)。
|
||||
|
||||
@@ -602,11 +405,31 @@ def api_system_health_summary():
|
||||
_, recent_error_count = server.stats_db.get_error_logs(days=1, page=1, limit=1)
|
||||
|
||||
# 基础设施健康:
|
||||
# 1. MySQL / Redis 都在这里做“首页摘要级”探测,而不是完整深度巡检;
|
||||
# 2. 除了连通性,还补充少量负载指标,方便管理员快速判断是否需要继续下钻;
|
||||
# 1. MySQL 用最轻量的 SELECT 1 做可用性探测;
|
||||
# 2. Redis 用 PING 验证连接池当前是否可拿到可用连接;
|
||||
# 3. 即使探测失败也只反馈到看板,不影响主接口整体返回。
|
||||
mysql_snapshot = _extract_mysql_runtime_snapshot(server.db_manager)
|
||||
redis_snapshot = _extract_redis_runtime_snapshot(server.db_manager)
|
||||
mysql_status = "healthy"
|
||||
mysql_summary = "连接正常"
|
||||
try:
|
||||
mysql_conn = server.db_manager.get_mysql_connection()
|
||||
try:
|
||||
with mysql_conn.cursor() as cursor:
|
||||
cursor.execute("SELECT 1")
|
||||
cursor.fetchone()
|
||||
finally:
|
||||
mysql_conn.close()
|
||||
except Exception as mysql_error:
|
||||
mysql_status = "danger"
|
||||
mysql_summary = f"MySQL 探测失败: {mysql_error}"
|
||||
|
||||
redis_status = "healthy"
|
||||
redis_summary = "连接正常"
|
||||
try:
|
||||
redis_conn = server.db_manager.get_redis_connection()
|
||||
redis_conn.ping()
|
||||
except Exception as redis_error:
|
||||
redis_status = "danger"
|
||||
redis_summary = f"Redis 探测失败: {redis_error}"
|
||||
|
||||
# md2img 健康快照已经有现成实现,这里只做聚合,不主动预热运行时。
|
||||
md2img_snapshot = get_md2img_health_snapshot(ensure_runtime=False) or {}
|
||||
@@ -701,22 +524,20 @@ def api_system_health_summary():
|
||||
"summary": error_summary,
|
||||
},
|
||||
"infrastructure": {
|
||||
"status": (
|
||||
"danger"
|
||||
if "danger" in {mysql_snapshot.get("status"), redis_snapshot.get("status")}
|
||||
else ("warning" if "warning" in {mysql_snapshot.get("status"), redis_snapshot.get("status")} else "healthy")
|
||||
),
|
||||
"status": "healthy" if mysql_status == "healthy" and redis_status == "healthy" else "danger",
|
||||
"summary": (
|
||||
"MySQL / Redis 均正常"
|
||||
if mysql_snapshot.get("status") == "healthy" and redis_snapshot.get("status") == "healthy"
|
||||
else (
|
||||
"基础设施连接正常,但部分负载指标需要关注"
|
||||
if mysql_snapshot.get("status") != "danger" and redis_snapshot.get("status") != "danger"
|
||||
else "存在基础设施连接异常"
|
||||
)
|
||||
if mysql_status == "healthy" and redis_status == "healthy"
|
||||
else "存在基础设施连接异常"
|
||||
),
|
||||
"mysql": mysql_snapshot,
|
||||
"redis": redis_snapshot,
|
||||
"mysql": {
|
||||
"status": mysql_status,
|
||||
"summary": mysql_summary,
|
||||
},
|
||||
"redis": {
|
||||
"status": redis_status,
|
||||
"summary": redis_summary,
|
||||
},
|
||||
},
|
||||
"ai_runtime": {
|
||||
"status": ai_status,
|
||||
|
||||
@@ -148,29 +148,6 @@
|
||||
</div>
|
||||
<div class="health-item__value">{% raw %}{{ card.value }}{% endraw %}</div>
|
||||
<div class="health-item__summary">{% raw %}{{ card.summary }}{% endraw %}</div>
|
||||
<div v-if="card.serviceBlocks && card.serviceBlocks.length" class="health-service-grid">
|
||||
<div
|
||||
v-for="service in card.serviceBlocks"
|
||||
:key="service.key"
|
||||
class="health-service-panel"
|
||||
:class="`health-service-panel--${service.status}`">
|
||||
<div class="health-service-panel__head">
|
||||
<div>
|
||||
<div class="health-service-panel__title">{% raw %}{{ service.title }}{% endraw %}</div>
|
||||
<div class="health-service-panel__summary">{% raw %}{{ service.summary }}{% endraw %}</div>
|
||||
</div>
|
||||
<span class="health-service-panel__badge" :class="`health-service-panel__badge--${service.status}`">
|
||||
{% raw %}{{ getHealthStatusText(service.status) }}{% endraw %}
|
||||
</span>
|
||||
</div>
|
||||
<div class="health-service-metrics">
|
||||
<div v-for="metric in service.metrics" :key="metric.label" class="health-service-metric">
|
||||
<span class="health-service-metric__label">{% raw %}{{ metric.label }}{% endraw %}</span>
|
||||
<span class="health-service-metric__value">{% raw %}{{ metric.value }}{% endraw %}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div v-if="card.extra" class="health-item__extra">{% raw %}{{ card.extra }}{% endraw %}</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -476,10 +453,9 @@
|
||||
key: 'infrastructure',
|
||||
title: '基础设施',
|
||||
status: infrastructure.status || 'warning',
|
||||
value: `${this.countHealthyInfrastructureServices(infrastructure)} / 2`,
|
||||
value: infrastructure.status === 'healthy' ? '正常' : '异常',
|
||||
summary: infrastructure.summary || '暂无状态',
|
||||
serviceBlocks: this.buildInfrastructureServiceBlocks(infrastructure),
|
||||
extra: '首页展示的是服务摘要;如果后续要做更深入的运维排查,再单独拆详细页会更合适。'
|
||||
extra: `MySQL:${((infrastructure.mysql || {}).status === 'healthy') ? '正常' : '异常'} / Redis:${((infrastructure.redis || {}).status === 'healthy') ? '正常' : '异常'}`
|
||||
},
|
||||
{
|
||||
key: 'ai_runtime',
|
||||
@@ -563,100 +539,6 @@
|
||||
};
|
||||
return statusMap[status] || '未知';
|
||||
},
|
||||
formatCompactDuration(seconds) {
|
||||
const totalSeconds = parseInt(seconds) || 0;
|
||||
if (totalSeconds <= 0) return '-';
|
||||
const days = Math.floor(totalSeconds / 86400);
|
||||
const hours = Math.floor((totalSeconds % 86400) / 3600);
|
||||
const minutes = Math.floor((totalSeconds % 3600) / 60);
|
||||
if (days > 0) return `${days}D ${hours}H`;
|
||||
if (hours > 0) return `${hours}H ${minutes}M`;
|
||||
return `${minutes}M`;
|
||||
},
|
||||
formatMetricNumber(value, fractionDigits = 0) {
|
||||
if (value === null || value === undefined || value === '') return '-';
|
||||
const numeric = Number(value);
|
||||
if (Number.isNaN(numeric)) return String(value);
|
||||
return numeric.toFixed(fractionDigits);
|
||||
},
|
||||
countHealthyInfrastructureServices(infrastructure) {
|
||||
const mysql = infrastructure.mysql || {};
|
||||
const redis = infrastructure.redis || {};
|
||||
let count = 0;
|
||||
if (mysql.status === 'healthy') count += 1;
|
||||
if (redis.status === 'healthy') count += 1;
|
||||
return count;
|
||||
},
|
||||
buildInfrastructureServiceBlocks(infrastructure) {
|
||||
const mysql = infrastructure.mysql || {};
|
||||
const redis = infrastructure.redis || {};
|
||||
return [
|
||||
{
|
||||
key: 'mysql',
|
||||
title: 'MySQL',
|
||||
status: mysql.status || 'warning',
|
||||
summary: mysql.summary || '暂无状态',
|
||||
metrics: [
|
||||
{
|
||||
label: '连接负载',
|
||||
value: `${this.formatMetricNumber(mysql.connection_usage_percent, 1)}%`
|
||||
},
|
||||
{
|
||||
label: '连接数',
|
||||
value: `${this.formatMetricNumber(mysql.threads_connected)} / ${mysql.max_connections || '-'}`
|
||||
},
|
||||
{
|
||||
label: '运行线程',
|
||||
value: this.formatMetricNumber(mysql.threads_running)
|
||||
},
|
||||
{
|
||||
label: 'QPS',
|
||||
value: this.formatMetricNumber(mysql.questions_per_second, 2)
|
||||
},
|
||||
{
|
||||
label: '库体积',
|
||||
value: `${this.formatMetricNumber(mysql.schema_size_mb, 2)} MB`
|
||||
},
|
||||
{
|
||||
label: '表数量',
|
||||
value: this.formatMetricNumber(mysql.table_count)
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
key: 'redis',
|
||||
title: 'Redis',
|
||||
status: redis.status || 'warning',
|
||||
summary: redis.summary || '暂无状态',
|
||||
metrics: [
|
||||
{
|
||||
label: 'Key 数量',
|
||||
value: this.formatMetricNumber(redis.key_count)
|
||||
},
|
||||
{
|
||||
label: '客户端',
|
||||
value: this.formatMetricNumber(redis.connected_clients)
|
||||
},
|
||||
{
|
||||
label: 'OPS/s',
|
||||
value: this.formatMetricNumber(redis.ops_per_sec)
|
||||
},
|
||||
{
|
||||
label: '内存占用',
|
||||
value: redis.used_memory_human || '-'
|
||||
},
|
||||
{
|
||||
label: '命中率',
|
||||
value: `${this.formatMetricNumber(redis.hit_rate_percent, 1)}%`
|
||||
},
|
||||
{
|
||||
label: '运行时间',
|
||||
value: this.formatCompactDuration(redis.uptime_seconds)
|
||||
}
|
||||
]
|
||||
}
|
||||
];
|
||||
},
|
||||
renderPieChart(chartId, usageValue, label) {
|
||||
const ctx = document.getElementById(chartId);
|
||||
if (!ctx) return;
|
||||
@@ -1213,104 +1095,6 @@
|
||||
color: #475569;
|
||||
}
|
||||
|
||||
.health-service-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||||
gap: 12px;
|
||||
margin-top: 16px;
|
||||
}
|
||||
|
||||
.health-service-panel {
|
||||
padding: 14px;
|
||||
border-radius: 16px;
|
||||
border: 1px solid rgba(148, 163, 184, 0.14);
|
||||
background: rgba(248, 250, 252, 0.72);
|
||||
}
|
||||
|
||||
.health-service-panel--healthy {
|
||||
box-shadow: inset 0 0 0 1px rgba(16, 185, 129, 0.08);
|
||||
}
|
||||
|
||||
.health-service-panel--warning {
|
||||
box-shadow: inset 0 0 0 1px rgba(245, 158, 11, 0.10);
|
||||
}
|
||||
|
||||
.health-service-panel--danger {
|
||||
box-shadow: inset 0 0 0 1px rgba(239, 68, 68, 0.10);
|
||||
}
|
||||
|
||||
.health-service-panel__head {
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
justify-content: space-between;
|
||||
gap: 12px;
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
|
||||
.health-service-panel__title {
|
||||
font-size: 14px;
|
||||
font-weight: 700;
|
||||
color: #0f172a;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
|
||||
.health-service-panel__summary {
|
||||
font-size: 12px;
|
||||
line-height: 1.6;
|
||||
color: #64748b;
|
||||
}
|
||||
|
||||
.health-service-panel__badge {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
min-width: 44px;
|
||||
padding: 4px 8px;
|
||||
border-radius: 999px;
|
||||
font-size: 11px;
|
||||
font-weight: 700;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.health-service-panel__badge--healthy {
|
||||
color: #047857;
|
||||
background: rgba(16, 185, 129, 0.12);
|
||||
}
|
||||
|
||||
.health-service-panel__badge--warning {
|
||||
color: #b45309;
|
||||
background: rgba(245, 158, 11, 0.14);
|
||||
}
|
||||
|
||||
.health-service-panel__badge--danger {
|
||||
color: #b91c1c;
|
||||
background: rgba(239, 68, 68, 0.14);
|
||||
}
|
||||
|
||||
.health-service-metrics {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||||
gap: 10px 12px;
|
||||
}
|
||||
|
||||
.health-service-metric {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 4px;
|
||||
}
|
||||
|
||||
.health-service-metric__label {
|
||||
font-size: 11px;
|
||||
color: #94a3b8;
|
||||
}
|
||||
|
||||
.health-service-metric__value {
|
||||
font-size: 13px;
|
||||
font-weight: 600;
|
||||
color: #1e293b;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.health-item__extra {
|
||||
margin-top: 12px;
|
||||
padding-top: 12px;
|
||||
@@ -1666,10 +1450,6 @@
|
||||
.health-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.health-service-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
@@ -1779,10 +1559,6 @@
|
||||
font-size: 24px;
|
||||
}
|
||||
|
||||
.health-service-metrics {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.chart-container--large,
|
||||
.chart-container--panel {
|
||||
height: 220px;
|
||||
|
||||
Reference in New Issue
Block a user