完善数据层慢SQL观测与消息查询优化
- 为数据库公共层增加慢 SQL 阈值配置与统一耗时日志记录 - 为消息表补充群成员回溯、群类型过滤和待处理媒体扫描等关键索引 - 将多处按日期查询改为时间范围查询,减少 DATE(timestamp) 导致的索引失效 - 修正消息存储层重复定义的日期范围方法,并更新工程优化文档中的 7.4 当前进展
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
import json
|
||||
from threading import Lock
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from db.base import BaseDBOperator
|
||||
@@ -12,8 +13,103 @@ from wechat_ipad.models.message import WxMessage
|
||||
class MessageStorageDB(BaseDBOperator):
|
||||
"""消息存储相关数据库操作"""
|
||||
|
||||
_performance_ready = False
|
||||
_performance_lock = Lock()
|
||||
|
||||
def __init__(self, db_manager: DBConnectionManager):
|
||||
super().__init__(db_manager)
|
||||
self._ensure_performance_primitives()
|
||||
|
||||
@staticmethod
|
||||
def _normalize_datetime_text(value) -> str:
|
||||
"""把日期/时间对象统一转成数据库可比较的标准字符串。"""
|
||||
if isinstance(value, datetime):
|
||||
return value.strftime("%Y-%m-%d %H:%M:%S")
|
||||
return str(value or "").strip()
|
||||
|
||||
@classmethod
|
||||
def _build_day_time_range(cls, target_date: str) -> tuple[str, str]:
|
||||
"""把 `YYYY-MM-DD` 日期转换成 `[00:00:00, 次日00:00:00)` 时间范围。"""
|
||||
start_dt = datetime.strptime(str(target_date or "").strip(), "%Y-%m-%d")
|
||||
end_dt = start_dt.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
next_day_dt = end_dt + timedelta(days=1)
|
||||
return (
|
||||
end_dt.strftime("%Y-%m-%d 00:00:00"),
|
||||
next_day_dt.strftime("%Y-%m-%d 00:00:00"),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _build_day_bounds(cls, start_date: str, end_date: str) -> tuple[str, str]:
|
||||
"""把日期区间转换成适合索引命中的时间范围。"""
|
||||
start_dt = datetime.strptime(str(start_date or "").strip(), "%Y-%m-%d")
|
||||
end_dt = datetime.strptime(str(end_date or "").strip(), "%Y-%m-%d")
|
||||
if end_dt < start_dt:
|
||||
start_dt, end_dt = end_dt, start_dt
|
||||
next_day_dt = end_dt + timedelta(days=1)
|
||||
return (
|
||||
start_dt.strftime("%Y-%m-%d 00:00:00"),
|
||||
next_day_dt.strftime("%Y-%m-%d 00:00:00"),
|
||||
)
|
||||
|
||||
def _ensure_performance_primitives(self) -> None:
|
||||
"""确保消息存储相关的关键索引存在。
|
||||
|
||||
设计说明:
|
||||
1. 这一步只补“高频查询明确受益”的索引,不做激进表结构重写;
|
||||
2. 使用 information_schema 做存在性检查,保证重复启动时仍然幂等;
|
||||
3. 只在进程内执行一次,避免每次 new MessageStorageDB 都重复打元数据查询。
|
||||
"""
|
||||
if self.__class__._performance_ready:
|
||||
return
|
||||
|
||||
with self.__class__._performance_lock:
|
||||
if self.__class__._performance_ready:
|
||||
return
|
||||
|
||||
self._ensure_index_exists(
|
||||
table_name="messages",
|
||||
index_name="idx_group_sender_timestamp",
|
||||
create_sql="CREATE INDEX idx_group_sender_timestamp ON messages (group_id, sender, timestamp)",
|
||||
)
|
||||
self._ensure_index_exists(
|
||||
table_name="messages",
|
||||
index_name="idx_group_type_timestamp",
|
||||
create_sql="CREATE INDEX idx_group_type_timestamp ON messages (group_id, message_type, timestamp)",
|
||||
)
|
||||
self._ensure_index_exists(
|
||||
table_name="messages",
|
||||
index_name="idx_media_pending_lookup",
|
||||
create_sql="CREATE INDEX idx_media_pending_lookup ON messages (message_type, image_path, timestamp, group_id)",
|
||||
)
|
||||
self.__class__._performance_ready = True
|
||||
|
||||
def _ensure_index_exists(self, table_name: str, index_name: str, create_sql: str) -> None:
|
||||
"""按需补建单个索引。"""
|
||||
database_name = self.db_manager.get_mysql_database_name()
|
||||
if not database_name:
|
||||
return
|
||||
|
||||
existing = self.execute_query(
|
||||
"""
|
||||
SELECT 1
|
||||
FROM information_schema.statistics
|
||||
WHERE table_schema = %s
|
||||
AND table_name = %s
|
||||
AND index_name = %s
|
||||
LIMIT 1
|
||||
""",
|
||||
(database_name, table_name, index_name),
|
||||
fetch_one=True,
|
||||
)
|
||||
if existing:
|
||||
return
|
||||
|
||||
# 索引补建属于“性能自愈”动作:
|
||||
# 1. 不要求用户手工跑 migration,服务启动时可自动补齐;
|
||||
# 2. 若线上库字段类型和预期不一致,失败后只记日志,不阻断主流程;
|
||||
# 3. 这样先拿到可观测收益,再决定后续是否做更完整的 schema migration。
|
||||
if not self.execute_update(create_sql):
|
||||
self.LOG.warning(f"消息表索引补建失败,请人工检查: table={table_name}, index={index_name}")
|
||||
|
||||
def archive_message(self, msg: WxMessage) -> bool:
|
||||
"""存档消息
|
||||
@@ -252,10 +348,12 @@ class MessageStorageDB(BaseDBOperator):
|
||||
|
||||
def get_member_messages_on_date(self, group_id: str, wxid: str, target_date: str, limit: int = 120) -> List[Dict]:
|
||||
"""获取成员在某一天的消息"""
|
||||
start_time, end_time = self._build_day_time_range(target_date)
|
||||
sql = """
|
||||
SELECT timestamp, sender, content, message_type
|
||||
FROM messages
|
||||
WHERE DATE(timestamp) = %s
|
||||
WHERE timestamp >= %s
|
||||
AND timestamp < %s
|
||||
AND group_id = %s
|
||||
AND sender = %s
|
||||
AND message_type IN (1, 49)
|
||||
@@ -264,14 +362,16 @@ class MessageStorageDB(BaseDBOperator):
|
||||
ORDER BY timestamp ASC
|
||||
LIMIT %s
|
||||
"""
|
||||
return self.execute_query(sql, (target_date, group_id, wxid, limit)) or []
|
||||
return self.execute_query(sql, (start_time, end_time, group_id, wxid, limit)) or []
|
||||
|
||||
def get_member_messages_for_group_date(self, group_id: str, target_date: str, limit: int = 5000) -> List[Dict]:
|
||||
"""获取群在某一天的全部文本消息"""
|
||||
start_time, end_time = self._build_day_time_range(target_date)
|
||||
sql = """
|
||||
SELECT timestamp, sender, content, message_type
|
||||
FROM messages
|
||||
WHERE DATE(timestamp) = %s
|
||||
WHERE timestamp >= %s
|
||||
AND timestamp < %s
|
||||
AND group_id = %s
|
||||
AND sender IS NOT NULL
|
||||
AND sender <> ''
|
||||
@@ -281,7 +381,7 @@ class MessageStorageDB(BaseDBOperator):
|
||||
ORDER BY timestamp ASC
|
||||
LIMIT %s
|
||||
"""
|
||||
return self.execute_query(sql, (target_date, group_id, limit)) or []
|
||||
return self.execute_query(sql, (start_time, end_time, group_id, limit)) or []
|
||||
|
||||
def get_recent_group_chat_messages(self, group_id: str, limit: int = 20) -> List[Dict]:
|
||||
"""获取群聊最近消息"""
|
||||
@@ -315,13 +415,15 @@ class MessageStorageDB(BaseDBOperator):
|
||||
|
||||
def get_message_count_by_date(self, date: str) -> List[Dict]:
|
||||
"""获取指定日期的消息统计"""
|
||||
start_time, end_time = self._build_day_time_range(date)
|
||||
sql = """
|
||||
SELECT group_id, sender, COUNT(*) as count
|
||||
FROM messages
|
||||
WHERE DATE(timestamp) = %s
|
||||
WHERE timestamp >= %s
|
||||
AND timestamp < %s
|
||||
GROUP BY group_id, sender
|
||||
"""
|
||||
return self.execute_query(sql, (date,)) or []
|
||||
return self.execute_query(sql, (start_time, end_time)) or []
|
||||
|
||||
def get_speech_ranking(self, date: str, group_id: str, limit: int = 20) -> List[Dict]:
|
||||
"""获取指定日期和群组的发言排名"""
|
||||
@@ -480,14 +582,19 @@ class MessageStorageDB(BaseDBOperator):
|
||||
params.append(group_id)
|
||||
|
||||
if start_date:
|
||||
sql_count += " AND DATE(timestamp) >= %s "
|
||||
sql_data += " AND DATE(timestamp) >= %s "
|
||||
params.append(start_date)
|
||||
start_bound = f"{str(start_date).strip()} 00:00:00"
|
||||
sql_count += " AND timestamp >= %s "
|
||||
sql_data += " AND timestamp >= %s "
|
||||
params.append(start_bound)
|
||||
|
||||
if end_date:
|
||||
sql_count += " AND DATE(timestamp) <= %s "
|
||||
sql_data += " AND DATE(timestamp) <= %s "
|
||||
params.append(end_date)
|
||||
_, end_bound = self._build_day_bounds(
|
||||
start_date or str(end_date).strip(),
|
||||
str(end_date).strip(),
|
||||
)
|
||||
sql_count += " AND timestamp < %s "
|
||||
sql_data += " AND timestamp < %s "
|
||||
params.append(end_bound)
|
||||
|
||||
if search_text:
|
||||
sql_count += " AND content LIKE %s "
|
||||
@@ -665,8 +772,8 @@ class MessageStorageDB(BaseDBOperator):
|
||||
"""
|
||||
return self.execute_query(sql, (f'%md5="{md5}"%',), fetch_one=True)
|
||||
|
||||
def get_messages_by_date_range(self, group_id: str, start_date: str, end_date: str = None,
|
||||
min_content_length: int = 6, max_results: int = 5000) -> List[Dict]:
|
||||
def get_messages_by_calendar_range(self, group_id: str, start_date: str, end_date: str = None,
|
||||
min_content_length: int = 6, max_results: int = 5000) -> List[Dict]:
|
||||
"""按日期范围获取消息(支持按天总结)
|
||||
|
||||
Args:
|
||||
@@ -682,11 +789,13 @@ class MessageStorageDB(BaseDBOperator):
|
||||
if end_date is None:
|
||||
end_date = start_date
|
||||
|
||||
start_time, end_time = self._build_day_bounds(start_date, end_date)
|
||||
|
||||
sql = """
|
||||
SELECT timestamp, sender, content, message_type
|
||||
FROM messages
|
||||
WHERE DATE(timestamp) >= %s
|
||||
AND DATE(timestamp) <= %s
|
||||
WHERE timestamp >= %s
|
||||
AND timestamp < %s
|
||||
AND group_id = %s
|
||||
AND message_type IN (1, 49)
|
||||
AND LENGTH(content) > %s
|
||||
@@ -695,7 +804,7 @@ class MessageStorageDB(BaseDBOperator):
|
||||
ORDER BY timestamp ASC
|
||||
LIMIT %s
|
||||
"""
|
||||
params = (start_date, end_date, group_id, min_content_length, max_results)
|
||||
params = (start_time, end_time, group_id, min_content_length, max_results)
|
||||
return self.execute_query(sql, params) or []
|
||||
|
||||
def get_messages_for_summary(self, group_id: str, hours_ago: int = 8,
|
||||
@@ -749,8 +858,8 @@ class MessageStorageDB(BaseDBOperator):
|
||||
AND content NOT LIKE '/%'
|
||||
ORDER BY timestamp ASC
|
||||
"""
|
||||
params = (start_time.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
end_time.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
params = (self._normalize_datetime_text(start_time),
|
||||
self._normalize_datetime_text(end_time),
|
||||
group_id)
|
||||
return self.execute_query(sql, params) or []
|
||||
|
||||
@@ -776,8 +885,8 @@ class MessageStorageDB(BaseDBOperator):
|
||||
AND CHAR_LENGTH(content) < 300
|
||||
AND content NOT LIKE '/%'
|
||||
"""
|
||||
params = (start_time.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
end_time.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
params = (self._normalize_datetime_text(start_time),
|
||||
self._normalize_datetime_text(end_time),
|
||||
group_id)
|
||||
result = self.execute_query(sql, params)
|
||||
return result[0]['count'] if result else 0
|
||||
@@ -801,8 +910,8 @@ class MessageStorageDB(BaseDBOperator):
|
||||
AND sender <> ''
|
||||
"""
|
||||
params = (
|
||||
start_time.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
end_time.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
self._normalize_datetime_text(start_time),
|
||||
self._normalize_datetime_text(end_time),
|
||||
group_id,
|
||||
)
|
||||
result = self.execute_query(sql, params, fetch_one=True) or {}
|
||||
|
||||
Reference in New Issue
Block a user