Files
abot/plugins/member_context/prompt_builder.py
liuwei 60b72874b5 feat: 重构成员画像为日周月分层沉淀链路并增强后台摘要能力
本次提交围绕成员画像插件进行了较大升级,核心目标是把原来偏单次、偏近期的成员交互摘要,升级为可随时间沉淀的分层画像能力。

主要功能变更如下:
1. 新增成员分层摘要存储表 t_member_digest,并提供对应的数据库操作层,支持按成员、按群、按摘要类型(daily/weekly/monthly)持久化周期性摘要结果。
2. 在 member_context 插件内新增 MemberDigestService,把画像生成拆分为日摘要、周摘要、月摘要三级处理流程,再由最终画像服务消费这些分层摘要,减少直接反复处理大量原始消息带来的成本和失真。
3. 新增提示词构建模块,分别为日级观察、周级归纳、月级归纳以及最终画像整理提供独立提示词,强调中性、克制、避免敏感推断,并将长期特征与近期状态明确分层。
4. 重写成员最终画像生成逻辑,优先基于日/周/月摘要融合出长期特征、习惯模式、长期回复偏好、近期状态等信息,再用 AI 对分层摘要做最终整理,避免仅依赖近 30 天消息得出偏短期结论。
5. 保留并增强长期画像融合逻辑,通过打分、衰减和重复证据累积,使长期特征随着时间逐步稳定,而不会被单次刷新完全覆盖。
6. 在消息存储层补充成员按时间增量获取、按活跃日期统计、按天取消息等查询方法,为后续分层摘要生成提供数据支撑。
7. 扩展 member_context 插件配置,增加日级摘要消息上限、日摘要最小消息数、单次回填的日摘要数量上限、最终画像使用的日/周/月摘要数量等参数,便于在准确性和系统负载之间做平衡。
8. 后台成员摘要详情页新增长期沟通倾向、长期特征、习惯模式、长期回复偏好、近期状态、历史样本数、分层摘要数量等展示字段,方便观察画像沉淀程度。
9. 优化后台查看成员摘要接口逻辑:首次打开如果还没有摘要,不再同步阻塞生成,而是返回未就绪状态,配合后台手动异步刷新,降低页面卡顿和接口阻塞风险。
10. 增强刷新日志,单成员和群级刷新会输出当前刷新模式以及日/周/月摘要数量,便于排查画像构建进度。
11. 调整当前日、当前周、当前月摘要的重算逻辑,确保新增日摘要写入后,本周和本月摘要不会长期停留在旧版本。

本次提交后,成员画像能力从“基于近期样本的单层摘要”升级为“基于时间沉淀的分层画像管线”,为后续把画像稳定接入 AI 自动回复上下文打下基础,同时尽量保持现有群权限控制和后台异步刷新方式不变。
2026-04-02 12:42:28 +08:00

148 lines
7.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import json
from typing import Dict, List
class MemberContextPromptBuilder:
"""成员分层画像提示词构建器"""
@staticmethod
def build_daily_digest_prompt(chatroom_id: str, wxid: str, display_name: str,
digest_date: str, messages: List[Dict]) -> str:
lines = []
for msg in messages[-80:]:
ts = str(msg.get("timestamp", ""))[11:16]
content = (msg.get("content") or "").replace("\n", " ").strip()[:180]
if content:
lines.append(f"[{ts}] {content}")
return (
"你是微信群后台的成员日观察摘要生成器。\n"
"请仅基于给定的当日公开聊天记录,提取对后续互动有帮助的中性行为观察。\n"
"不要做人格诊断、隐私猜测、负面评价,不要脑补群外信息。\n"
"输出严格 JSON不要 markdown。\n"
"{"
"\"topics\":[\"主题1\"],"
"\"interaction_style\":\"一句中文\","
"\"message_pattern\":\"一句中文\","
"\"response_style_hint\":\"一句中文\","
"\"habit_signals\":[\"信号1\"],"
"\"engagement_traits\":[\"特征1\"],"
"\"reply_taboos\":[\"避坑1\"],"
"\"temperament_signal\":\"一句中文,描述当天显露的沟通倾向,必须克制\","
"\"summary_text\":\"一段不超过100字的日摘要\","
"\"representative_messages\":[\"原话1\",\"原话2\"],"
"\"confidence\":0.0"
"}\n"
"要求:\n"
"1. topics 最多4个habit_signals 最多4个engagement_traits 最多4个reply_taboos 最多3个。\n"
"2. temperament_signal 只能写当日可观察到的沟通倾向,不可上升为长期性格判断。\n"
"3. representative_messages 保留最能代表当天风格的短句最多3条。\n"
f"成员: {display_name} ({wxid})\n"
f"群ID: {chatroom_id}\n"
f"日期: {digest_date}\n"
"当日消息:\n" + ("\n".join(lines) or "暂无")
)
@staticmethod
def build_period_digest_prompt(digest_type: str, chatroom_id: str, wxid: str,
display_name: str, period_key: str, items: List[Dict]) -> str:
structured_lines = []
for item in items:
structured = item.get("structured", {}) or {}
payload = {
"period_key": item.get("period_key"),
"summary_text": item.get("summary_text", ""),
"topics": structured.get("topics") or structured.get("stable_topics") or structured.get("long_term_topics") or [],
"habit_signals": structured.get("habit_signals") or structured.get("habit_patterns") or [],
"engagement_traits": structured.get("engagement_traits") or structured.get("stable_traits") or [],
"reply_preferences": structured.get("reply_preferences") or structured.get("long_term_reply_preferences") or [],
"temperament_signal": structured.get("temperament_signal") or structured.get("temperament_tendency") or "",
"recent_state": structured.get("recent_state") or [],
}
structured_lines.append(json.dumps(payload, ensure_ascii=False))
if digest_type == "weekly":
schema = (
"{"
"\"stable_topics\":[\"主题1\"],"
"\"stable_traits\":[\"特征1\"],"
"\"habit_patterns\":[\"习惯1\"],"
"\"reply_preferences\":[\"偏好1\"],"
"\"recent_state\":[\"状态1\"],"
"\"temperament_tendency\":\"一句中文\","
"\"summary_text\":\"一段不超过120字的周摘要\","
"\"confidence\":0.0"
"}"
)
extra = "请从多个日摘要中提炼本周重复出现的模式,过滤单日噪音。"
else:
schema = (
"{"
"\"long_term_topics\":[\"主题1\"],"
"\"stable_traits\":[\"特征1\"],"
"\"habit_patterns\":[\"习惯1\"],"
"\"long_term_reply_preferences\":[\"偏好1\"],"
"\"phase_state\":[\"状态1\"],"
"\"temperament_tendency\":\"一句中文\","
"\"summary_text\":\"一段不超过140字的月摘要\","
"\"confidence\":0.0"
"}"
)
extra = "请从多个周摘要中提炼阶段性稳定特征,只有反复出现的模式才能进入长期层。"
return (
f"你是微信群后台的成员{digest_type}摘要生成器。\n"
f"{extra}\n"
"不可做心理诊断、负面评价、隐私猜测。输出严格 JSON不要 markdown。\n"
f"{schema}\n"
"要求:\n"
"1. 所有列表字段最多5项必须中性克制。\n"
"2. 只有多个下级摘要反复出现的特征,才允许写进 stable_traits / habit_patterns / long_term_reply_preferences。\n"
"3. recent_state / phase_state 只描述当前阶段状态,不要冒充长期人格。\n"
f"成员: {display_name} ({wxid})\n"
f"群ID: {chatroom_id}\n"
f"周期: {period_key}\n"
"下级摘要:\n" + ("\n".join(structured_lines) or "暂无")
)
@staticmethod
def build_final_context_prompt(chatroom_id: str, wxid: str, display_name: str,
monthly_digests: List[Dict], weekly_digests: List[Dict],
daily_digests: List[Dict]) -> str:
monthly_lines = [json.dumps(item.get("structured", {}), ensure_ascii=False) for item in monthly_digests[:6]]
weekly_lines = [json.dumps(item.get("structured", {}), ensure_ascii=False) for item in weekly_digests[:4]]
daily_lines = [json.dumps(item.get("structured", {}), ensure_ascii=False) for item in daily_digests[:6]]
return (
"你是微信群后台的最终成员交互画像整理器。\n"
"请结合月级、周级、日级摘要,输出一个既有长期层又有近期层的后台交互画像。\n"
"不要做敏感推断、心理诊断、隐私猜测。输出严格 JSON不要 markdown。\n"
"{"
"\"activity_level\":\"高活跃|中活跃|低活跃|观察中\","
"\"message_pattern\":\"一句中文\","
"\"interaction_style\":\"一句中文\","
"\"response_style_hint\":\"一句中文\","
"\"topics_of_interest\":[\"主题1\"],"
"\"recent_focus\":[\"近期主题1\"],"
"\"stable_traits\":[\"长期特征1\"],"
"\"habit_patterns\":[\"习惯1\"],"
"\"long_term_reply_preferences\":[\"偏好1\"],"
"\"recent_state\":[\"近期状态1\"],"
"\"temperament_tendency\":\"一句中文\","
"\"summary_text\":\"一段不超过150字的后台摘要\","
"\"confidence\":0.0,"
"\"engagement_traits\":[\"特征1\"],"
"\"reply_taboos\":[\"避坑1\"]"
"}\n"
"要求:\n"
"1. stable_traits、habit_patterns、long_term_reply_preferences 只从月级和多次重复证据中提取。\n"
"2. recent_focus、recent_state 更依赖最近周级和日级。\n"
"3. summary_text 要像后台备注,不要明显暴露在给用户做画像。\n"
f"成员: {display_name} ({wxid})\n"
f"群ID: {chatroom_id}\n"
"月级摘要:\n" + ("\n".join(monthly_lines) or "暂无")
+ "\n周级摘要:\n" + ("\n".join(weekly_lines) or "暂无")
+ "\n日级摘要:\n" + ("\n".join(daily_lines) or "暂无")
)