完善成员画像插件的日/周/月分层提取与已结束日期处理逻辑
- 将成员画像能力进一步收敛到插件内部,强化按群启用、后台异步刷新、后台查看的完整链路 - 新增群维度按日批量提取能力:以群为单位按天处理一次,统一提取当天活跃成员的日级画像摘要 - 日级画像输出扩展为更适合长期累计的结构化信号,补充身份线索、技能信号、家庭线索、阶段线索、价值偏好、群内角色、决策风格等字段 - 优化提示词设计,明确要求优先提取可复用、可累计、可验证的行为线索,减少一次性情绪和短期噪声对长期画像的干扰 - 打通日 -> 周 -> 月 -> 最终画像 的分层汇总链路,让后续月度画像直接消费日/周级结构化摘要,而不是重复回扫长窗口原始消息 - 新增/完善画像融合策略:identity_traits、skill_profile、family_profile、life_stage_profile、value_profile 也纳入长期分数累计,不再仅依赖最近一次结果覆盖旧结果 - 将活跃群、活跃成员、辅助消息样本等口径统一调整为只处理已结束日期,避免当天未完结数据进入画像计算 - 调整日级批处理逻辑,默认只处理昨天及更早日期,确保不会处理当天消息 - 修复重复执行时仍然先调用 AI 再跳过的问题,改为先检查当天候选成员是否已完成生成,全部已存在时直接跳过,减少无效 AI 请求和耗时 - 增加群日批处理、周摘要、月摘要、群刷新进度等日志,方便后台定位当前刷新到哪些群、哪些成员、进度如何 - 丰富后台画像展示字段,支持查看更完整的长期画像维度与摘要统计 - 更新插件配置默认值,收敛为近 60 天启动窗口、每日滚动处理与群级日摘要模式 - 补充 message_storage 读取能力,支持按群按日提取消息,为群日批量画像与后续周期汇总提供底层数据支撑
This commit is contained in:
@@ -43,21 +43,39 @@ class MessageStorageDB(BaseDBOperator):
|
||||
params = (hours_ago, group_id, min_content_length)
|
||||
return self.execute_query(sql, params) or []
|
||||
|
||||
def get_member_recent_messages(self, group_id: str, wxid: str, days: int = 30, limit: int = 200) -> List[Dict]:
|
||||
def get_member_recent_messages(self, group_id: str, wxid: str, days: int = 30,
|
||||
limit: int = 200, include_today: bool = True) -> List[Dict]:
|
||||
"""获取指定群成员近期消息"""
|
||||
sql = """
|
||||
SELECT timestamp, sender, content, message_type
|
||||
FROM messages
|
||||
WHERE timestamp >= DATE_SUB(NOW(), INTERVAL %s DAY)
|
||||
AND group_id = %s
|
||||
AND sender = %s
|
||||
AND message_type IN (1, 49)
|
||||
AND CHAR_LENGTH(content) BETWEEN 2 AND 500
|
||||
AND content NOT LIKE '/%%'
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT %s
|
||||
"""
|
||||
results = self.execute_query(sql, (days, group_id, wxid, limit)) or []
|
||||
if include_today:
|
||||
sql = """
|
||||
SELECT timestamp, sender, content, message_type
|
||||
FROM messages
|
||||
WHERE timestamp >= DATE_SUB(NOW(), INTERVAL %s DAY)
|
||||
AND group_id = %s
|
||||
AND sender = %s
|
||||
AND message_type IN (1, 49)
|
||||
AND CHAR_LENGTH(content) BETWEEN 2 AND 500
|
||||
AND content NOT LIKE '/%%'
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT %s
|
||||
"""
|
||||
params = (days, group_id, wxid, limit)
|
||||
else:
|
||||
sql = """
|
||||
SELECT timestamp, sender, content, message_type
|
||||
FROM messages
|
||||
WHERE timestamp >= DATE_SUB(CURDATE(), INTERVAL %s DAY)
|
||||
AND timestamp < CURDATE()
|
||||
AND group_id = %s
|
||||
AND sender = %s
|
||||
AND message_type IN (1, 49)
|
||||
AND CHAR_LENGTH(content) BETWEEN 2 AND 500
|
||||
AND content NOT LIKE '/%%'
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT %s
|
||||
"""
|
||||
params = (days, group_id, wxid, limit)
|
||||
results = self.execute_query(sql, params) or []
|
||||
return list(reversed(results))
|
||||
|
||||
def get_member_messages_since(self, group_id: str, wxid: str, since_time, limit: int = 200) -> List[Dict]:
|
||||
@@ -122,6 +140,23 @@ class MessageStorageDB(BaseDBOperator):
|
||||
"""
|
||||
return self.execute_query(sql, (target_date, group_id, wxid, limit)) or []
|
||||
|
||||
def get_member_messages_for_group_date(self, group_id: str, target_date: str, limit: int = 5000) -> List[Dict]:
|
||||
"""获取群在某一天的全部文本消息"""
|
||||
sql = """
|
||||
SELECT timestamp, sender, content, message_type
|
||||
FROM messages
|
||||
WHERE DATE(timestamp) = %s
|
||||
AND group_id = %s
|
||||
AND sender IS NOT NULL
|
||||
AND sender <> ''
|
||||
AND message_type IN (1, 49)
|
||||
AND CHAR_LENGTH(content) BETWEEN 2 AND 500
|
||||
AND content NOT LIKE '/%%'
|
||||
ORDER BY timestamp ASC
|
||||
LIMIT %s
|
||||
"""
|
||||
return self.execute_query(sql, (target_date, group_id, limit)) or []
|
||||
|
||||
def get_message_count_by_date(self, date: str) -> List[Dict]:
|
||||
"""获取指定日期的消息统计"""
|
||||
sql = """
|
||||
|
||||
Reference in New Issue
Block a user