feat: dedupe and schedule emoji media downloads

This commit is contained in:
liuwei
2026-04-13 12:06:58 +08:00
parent 9698f9577f
commit ada1b656e0
4 changed files with 172 additions and 85 deletions

View File

@@ -88,6 +88,25 @@ class MessageStorageDB(BaseDBOperator):
"""
return self.execute_query(sql, (f'%md5="{md5}"%',), fetch_one=True)
def get_media_message_by_md5(self, md5: str, current_message_id: int | str | None = None) -> Optional[Dict]:
"""根据 md5 查找已落盘的图片/表情消息,用于去重复用本地文件"""
sql = """
SELECT id, group_id, timestamp, sender, content, message_type,
attachment_url, message_id, message_xml, message_thumb, image_path
FROM messages
WHERE attachment_url IS NOT NULL
AND attachment_url <> ''
AND attachment_url LIKE %s
AND image_path IS NOT NULL
AND image_path <> ''
"""
params: List = [f'%md5="{md5}"%']
if current_message_id is not None:
sql += " AND message_id <> %s"
params.append(current_message_id)
sql += " ORDER BY id DESC LIMIT 1"
return self.execute_query(sql, tuple(params), fetch_one=True)
def get_member_recent_messages(self, group_id: str, wxid: str, days: int = 30,
limit: int = 200, include_today: bool = True) -> List[Dict]:
"""获取指定群成员近期消息"""
@@ -513,8 +532,8 @@ class MessageStorageDB(BaseDBOperator):
return self.execute_query(sql, tuple(params)) or []
def get_pending_image_messages(self, minutes_ago: int = 10, limit: int = 50) -> List[Dict]:
"""获取最近N分钟内未处理图片消息image_path IS NULL
def get_pending_media_messages(self, minutes_ago: int = 10, limit: int = 50) -> List[Dict]:
"""获取最近N分钟内未处理图片/表情消息image_path IS NULL
Args:
minutes_ago: 查询最近多少分钟的消息默认10分钟
@@ -524,19 +543,23 @@ class MessageStorageDB(BaseDBOperator):
包含消息ID、群ID、消息XML等信息的列表
"""
sql = """
SELECT message_id, group_id, message_xml, timestamp,attachment_url
SELECT message_id, group_id, sender, message_type, message_xml, timestamp, attachment_url
FROM messages
WHERE message_type = '3'
WHERE message_type IN ('3', '47', '1048625', '1090519089')
AND image_path IS NULL
AND timestamp >= DATE_SUB(NOW(), INTERVAL %s MINUTE)
AND message_xml IS NOT NULL
AND message_xml != ''
AND attachment_url IS NOT NULL
AND attachment_url != ''
ORDER BY timestamp ASC
LIMIT %s
"""
params = (minutes_ago, limit)
return self.execute_query(sql, params) or []
def get_pending_image_messages(self, minutes_ago: int = 10, limit: int = 50) -> List[Dict]:
"""兼容旧方法名,内部复用统一媒体待处理查询"""
return self.get_pending_media_messages(minutes_ago, limit)
def get_messages_by_date_range(self, group_id: str, start_date: str, end_date: str = None,
min_content_length: int = 6, max_results: int = 5000) -> List[Dict]:
"""按日期范围获取消息(支持按天总结)