尝试进行总结优化

This commit is contained in:
liuwei
2026-01-06 16:02:00 +08:00
parent b8e881980f
commit 491c0d16fb
3 changed files with 215 additions and 27 deletions

View File

@@ -260,11 +260,11 @@ class MessageStorageDB(BaseDBOperator):
def get_pending_image_messages(self, minutes_ago: int = 10, limit: int = 50) -> List[Dict]:
"""获取最近N分钟内未处理图片的消息image_path IS NULL
Args:
minutes_ago: 查询最近多少分钟的消息默认10分钟
limit: 每次最多处理多少条默认50条
Returns:
包含消息ID、群ID、消息XML等信息的列表
"""
@@ -281,3 +281,64 @@ class MessageStorageDB(BaseDBOperator):
"""
params = (minutes_ago, limit)
return self.execute_query(sql, params) or []
def get_messages_by_date_range(self, group_id: str, start_date: str, end_date: str = None,
min_content_length: int = 6, max_results: int = 5000) -> List[Dict]:
"""按日期范围获取消息(支持按天总结)
Args:
group_id: 群组ID
start_date: 开始日期,格式 YYYY-MM-DD
end_date: 结束日期,格式 YYYY-MM-DD如果为None则使用start_date当天
min_content_length: 最小内容长度
max_results: 最多返回多少条消息防止数据过多默认5000条足够总结使用
Returns:
消息列表
"""
if end_date is None:
end_date = start_date
sql = """
SELECT timestamp, sender, content, message_type
FROM messages
WHERE DATE(timestamp) >= %s
AND DATE(timestamp) <= %s
AND group_id = %s
AND message_type IN (1, 49)
AND LENGTH(content) > %s
AND CHAR_LENGTH(content) < 300
AND content NOT LIKE '/%'
ORDER BY timestamp ASC
LIMIT %s
"""
params = (start_date, end_date, group_id, min_content_length, max_results)
return self.execute_query(sql, params) or []
def get_messages_for_summary(self, group_id: str, hours_ago: int = 8,
min_messages: int = 50,
max_hours: int = 48,
max_results: int = 5000) -> List[Dict]:
"""智能获取用于总结的消息(自动调整时间范围)
Args:
group_id: 群组ID
hours_ago: 默认查询最近多少小时
min_messages: 最少需要多少条消息,如果不足会扩大时间范围
max_hours: 最大查询多少小时内的消息
max_results: 最多返回多少条消息默认5000条确保有足够数据
Returns:
消息列表
"""
# 先尝试默认时间范围
messages = self.get_recent_messages(group_id, hours_ago=hours_ago)
# 如果消息不足,逐步扩大时间范围
current_hours = hours_ago
while len(messages) < min_messages and current_hours < max_hours:
current_hours += 8 # 每次增加8小时
messages = self.get_recent_messages(group_id, hours_ago=current_hours)
# 限制最大返回数量5000条足以覆盖1-2天的活跃群聊
return messages[:max_results] if messages else []