Reapply "完善表情资产后台能力并补充群总结落库"

This reverts commit 57bb46bb21.
This commit is contained in:
liuwei
2026-04-02 17:55:21 +08:00
parent 57bb46bb21
commit 079f363382
11 changed files with 671 additions and 17 deletions

141
db/emoji_asset_db.py Normal file
View File

@@ -0,0 +1,141 @@
# -*- coding: utf-8 -*-
from datetime import datetime
from typing import Dict, List, Optional
from db.base import BaseDBOperator
from db.connection import DBConnectionManager
class EmojiAssetDBOperator(BaseDBOperator):
"""表情资产数据库操作"""
def __init__(self, db_manager: DBConnectionManager):
super().__init__(db_manager)
self._create_tables()
def _create_tables(self):
try:
self.execute_update("""
CREATE TABLE IF NOT EXISTS t_emoji_asset (
id INT AUTO_INCREMENT PRIMARY KEY,
md5 VARCHAR(64) NOT NULL COMMENT '表情MD5',
total_length INT NOT NULL DEFAULT 0 COMMENT '表情长度',
file_path VARCHAR(255) NOT NULL COMMENT '本地访问路径',
file_ext VARCHAR(16) DEFAULT '' COMMENT '文件扩展名',
source_message_id BIGINT DEFAULT NULL COMMENT '来源消息ID',
source_chatroom_id VARCHAR(64) DEFAULT '' COMMENT '来源群ID',
source_wxid VARCHAR(64) DEFAULT '' COMMENT '来源发送人',
usage_count INT NOT NULL DEFAULT 0 COMMENT '使用次数',
last_used_at DATETIME DEFAULT NULL COMMENT '最近采集时间',
last_sent_at DATETIME DEFAULT NULL COMMENT '最近发送时间',
create_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
update_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
UNIQUE KEY idx_emoji_asset_md5 (md5),
KEY idx_emoji_asset_recent (update_time),
KEY idx_emoji_asset_group (source_chatroom_id, update_time)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='表情资产表';
""")
except Exception as e:
self.LOG.error(f"创建表情资产表失败: {e}")
def save_asset(self, asset: Dict) -> bool:
try:
sql = """
INSERT INTO t_emoji_asset (
md5, total_length, file_path, file_ext,
source_message_id, source_chatroom_id, source_wxid,
usage_count, last_used_at, last_sent_at
)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
total_length = VALUES(total_length),
file_path = VALUES(file_path),
file_ext = VALUES(file_ext),
source_message_id = COALESCE(VALUES(source_message_id), source_message_id),
source_chatroom_id = CASE
WHEN VALUES(source_chatroom_id) IS NULL OR VALUES(source_chatroom_id) = '' THEN source_chatroom_id
ELSE VALUES(source_chatroom_id)
END,
source_wxid = CASE
WHEN VALUES(source_wxid) IS NULL OR VALUES(source_wxid) = '' THEN source_wxid
ELSE VALUES(source_wxid)
END,
usage_count = usage_count + 1,
last_used_at = VALUES(last_used_at)
"""
now = asset.get("last_used_at") or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
params = (
asset.get("md5", ""),
int(asset.get("total_length", 0) or 0),
asset.get("file_path", ""),
asset.get("file_ext", ""),
asset.get("source_message_id"),
asset.get("source_chatroom_id", ""),
asset.get("source_wxid", ""),
int(asset.get("usage_count", 1) or 1),
now,
asset.get("last_sent_at"),
)
return self.execute_update(sql, params)
except Exception as e:
self.LOG.error(f"保存表情资产失败: {e}")
return False
def list_assets(self, limit: int = 60, chatroom_id: str = "") -> List[Dict]:
try:
sql = """
SELECT id, md5, total_length, file_path, file_ext, source_message_id,
source_chatroom_id, source_wxid, usage_count, last_used_at,
last_sent_at, create_time, update_time
FROM t_emoji_asset
WHERE file_path IS NOT NULL AND file_path <> ''
"""
params = []
if chatroom_id:
sql += " AND source_chatroom_id = %s "
params.append(chatroom_id)
sql += " ORDER BY COALESCE(last_sent_at, last_used_at, update_time) DESC LIMIT %s "
params.append(limit)
rows = self.execute_query(sql, tuple(params)) or []
return [self._serialize_row(row) for row in rows]
except Exception as e:
self.LOG.error(f"查询表情资产失败: {e}")
return []
def get_asset_by_md5(self, md5: str) -> Optional[Dict]:
try:
sql = """
SELECT id, md5, total_length, file_path, file_ext, source_message_id,
source_chatroom_id, source_wxid, usage_count, last_used_at,
last_sent_at, create_time, update_time
FROM t_emoji_asset
WHERE md5 = %s
LIMIT 1
"""
row = self.execute_query(sql, (md5,), fetch_one=True)
return self._serialize_row(row) if row else None
except Exception as e:
self.LOG.error(f"查询表情资产详情失败: {e}")
return None
def mark_sent(self, md5: str) -> bool:
try:
sql = """
UPDATE t_emoji_asset
SET last_sent_at = NOW()
WHERE md5 = %s
"""
return self.execute_update(sql, (md5,))
except Exception as e:
self.LOG.error(f"更新表情发送时间失败: {e}")
return False
@staticmethod
def _serialize_row(row: Dict) -> Dict:
if not row:
return row
for key in ("last_used_at", "last_sent_at", "create_time", "update_time"):
value = row.get(key)
if isinstance(value, datetime):
row[key] = value.strftime("%Y-%m-%d %H:%M:%S")
return row

View File

@@ -338,6 +338,22 @@ class MessageStorageDB(BaseDBOperator):
print(f"更新消息图片文件路径出错: {e}")
return False
def get_pending_emoji_messages(self, minutes_ago: int = 1440, limit: int = 50) -> List[Dict]:
"""获取最近N分钟内未处理表情的消息"""
sql = """
SELECT message_id, group_id, sender, message_xml, timestamp, attachment_url, message_type
FROM messages
WHERE message_type IN ('47', '1090519089')
AND image_path IS NULL
AND timestamp >= DATE_SUB(NOW(), INTERVAL %s MINUTE)
AND attachment_url IS NOT NULL
AND attachment_url != ''
ORDER BY timestamp ASC
LIMIT %s
"""
params = (minutes_ago, limit)
return self.execute_query(sql, params) or []
def get_hourly_message_trend(self, group_id: str = None, days: int = 1) -> List[Dict]:
"""获取指定群组的按小时消息趋势数据

113
db/message_summary_db.py Normal file
View File

@@ -0,0 +1,113 @@
# -*- coding: utf-8 -*-
import json
from datetime import datetime
from typing import Dict, Optional
from db.base import BaseDBOperator
from db.connection import DBConnectionManager
class MessageSummaryDBOperator(BaseDBOperator):
"""群消息总结数据库操作"""
def __init__(self, db_manager: DBConnectionManager):
super().__init__(db_manager)
self._create_tables()
def _create_tables(self):
try:
self.execute_update("""
CREATE TABLE IF NOT EXISTS t_message_summary (
id INT AUTO_INCREMENT PRIMARY KEY,
chatroom_id VARCHAR(64) NOT NULL COMMENT '群聊ID',
group_name VARCHAR(128) DEFAULT '' COMMENT '群名称',
summary_type VARCHAR(16) NOT NULL COMMENT '总结类型 daily|manual',
period_key VARCHAR(32) NOT NULL COMMENT '周期主键,如 2026-04-01',
period_start DATETIME NULL COMMENT '总结周期开始时间',
period_end DATETIME NULL COMMENT '总结周期结束时间',
source_message_count INT NOT NULL DEFAULT 0 COMMENT '源消息数量',
summary_text LONGTEXT COMMENT '总结文本',
image_path VARCHAR(255) DEFAULT NULL COMMENT '总结图片路径',
meta_json LONGTEXT COMMENT '附加元数据JSON',
last_generated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '最后生成时间',
create_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
update_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
UNIQUE KEY idx_message_summary (chatroom_id, summary_type, period_key),
KEY idx_message_summary_lookup (chatroom_id, period_end)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='群消息总结表';
""")
except Exception as e:
self.LOG.error(f"创建群消息总结表失败: {e}")
def save_summary(self, summary: Dict) -> bool:
try:
data = {
"chatroom_id": summary.get("chatroom_id", ""),
"group_name": summary.get("group_name", ""),
"summary_type": summary.get("summary_type", "daily"),
"period_key": summary.get("period_key", ""),
"period_start": summary.get("period_start"),
"period_end": summary.get("period_end"),
"source_message_count": int(summary.get("source_message_count", 0) or 0),
"summary_text": summary.get("summary_text", ""),
"image_path": summary.get("image_path"),
"meta_json": json.dumps(summary.get("meta", {}), ensure_ascii=False),
"last_generated_at": summary.get(
"last_generated_at",
datetime.now().strftime("%Y-%m-%d %H:%M:%S")
),
}
fields = ", ".join(data.keys())
placeholders = ", ".join(["%s"] * len(data))
update_clause = ", ".join(
[
f"{key}=VALUES({key})"
for key in data.keys()
if key not in ("chatroom_id", "summary_type", "period_key")
]
)
sql = f"""
INSERT INTO t_message_summary ({fields})
VALUES ({placeholders})
ON DUPLICATE KEY UPDATE {update_clause}
"""
return self.execute_update(sql, tuple(data.values()))
except Exception as e:
self.LOG.error(f"保存群消息总结失败: {e}")
return False
def get_summary(self, chatroom_id: str, summary_type: str, period_key: str) -> Optional[Dict]:
try:
sql = """
SELECT *
FROM t_message_summary
WHERE chatroom_id = %s AND summary_type = %s AND period_key = %s
LIMIT 1
"""
row = self.execute_query(sql, (chatroom_id, summary_type, period_key), fetch_one=True)
return self._deserialize_row(row)
except Exception as e:
self.LOG.error(f"获取群消息总结失败: {e}")
return None
@staticmethod
def _deserialize_row(row: Optional[Dict]) -> Optional[Dict]:
if not row:
return row
meta_json = row.get("meta_json")
if meta_json:
try:
row["meta_json"] = json.loads(meta_json)
except Exception:
row["meta_json"] = {}
else:
row["meta_json"] = {}
for key in ("period_start", "period_end", "last_generated_at", "create_time", "update_time"):
value = row.get(key)
if isinstance(value, datetime):
row[key] = value.strftime("%Y-%m-%d %H:%M:%S")
row["meta"] = row.get("meta_json", {})
return row