Files
abot/plugins/member_context/prompt_builder.py
liuwei 78ab3e65c2 完善成员画像插件的日/周/月分层提取与已结束日期处理逻辑
- 将成员画像能力进一步收敛到插件内部,强化按群启用、后台异步刷新、后台查看的完整链路
- 新增群维度按日批量提取能力:以群为单位按天处理一次,统一提取当天活跃成员的日级画像摘要
- 日级画像输出扩展为更适合长期累计的结构化信号,补充身份线索、技能信号、家庭线索、阶段线索、价值偏好、群内角色、决策风格等字段
- 优化提示词设计,明确要求优先提取可复用、可累计、可验证的行为线索,减少一次性情绪和短期噪声对长期画像的干扰
- 打通日 -> 周 -> 月 -> 最终画像 的分层汇总链路,让后续月度画像直接消费日/周级结构化摘要,而不是重复回扫长窗口原始消息
- 新增/完善画像融合策略:identity_traits、skill_profile、family_profile、life_stage_profile、value_profile 也纳入长期分数累计,不再仅依赖最近一次结果覆盖旧结果
- 将活跃群、活跃成员、辅助消息样本等口径统一调整为只处理已结束日期,避免当天未完结数据进入画像计算
- 调整日级批处理逻辑,默认只处理昨天及更早日期,确保不会处理当天消息
- 修复重复执行时仍然先调用 AI 再跳过的问题,改为先检查当天候选成员是否已完成生成,全部已存在时直接跳过,减少无效 AI 请求和耗时
- 增加群日批处理、周摘要、月摘要、群刷新进度等日志,方便后台定位当前刷新到哪些群、哪些成员、进度如何
- 丰富后台画像展示字段,支持查看更完整的长期画像维度与摘要统计
- 更新插件配置默认值,收敛为近 60 天启动窗口、每日滚动处理与群级日摘要模式
- 补充 message_storage 读取能力,支持按群按日提取消息,为群日批量画像与后续周期汇总提供底层数据支撑
2026-04-02 13:41:51 +08:00

249 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import json
from typing import Dict, List
class MemberContextPromptBuilder:
"""成员分层画像提示词构建器"""
@staticmethod
def build_group_daily_digest_prompt(chatroom_id: str, digest_date: str,
member_labels: List[str], compressed_chat: str) -> str:
return (
"你是微信群后台的成员日观察批量提取器。\n"
"请基于给定的一天群聊记录,识别当天有参与发言的成员,并分别提取中性、克制的成员日观察摘要。\n"
"不要做心理诊断、隐私猜测、负面评价,不要输出未在候选名单中的成员。\n"
"你的输出将被后续系统按周、按月持续累积,因此优先提取可复用、可累计、可验证的行为信号,而不是一次性的情绪和玩笑。\n"
"输出严格 JSON不要 markdown。\n"
"{"
"\"members\":["
"{"
"\"wxid\":\"成员wxid\","
"\"display_name\":\"成员显示名\","
"\"topics\":[\"主题1\"],"
"\"identity_clues\":[\"身份线索1\"],"
"\"skill_signals\":[\"技能信号1\"],"
"\"family_signals\":[\"家庭线索1\"],"
"\"life_stage_signals\":[\"阶段线索1\"],"
"\"value_preferences\":[\"价值偏好1\"],"
"\"interaction_style\":\"一句中文\","
"\"message_pattern\":\"一句中文\","
"\"response_style_hint\":\"一句中文\","
"\"habit_signals\":[\"信号1\"],"
"\"engagement_traits\":[\"特征1\"],"
"\"decision_style\":\"一句中文\","
"\"social_role\":\"一句中文,描述当天在群中的角色表现\","
"\"reply_taboos\":[\"避坑1\"],"
"\"temperament_signal\":\"一句中文,描述当天沟通倾向,必须克制\","
"\"summary_text\":\"一段不超过100字的日摘要\","
"\"representative_messages\":[\"原话1\",\"原话2\"],"
"\"confidence\":0.0"
"}"
"]"
"}\n"
"要求:\n"
"1. 只输出当天真正参与发言且能看出明确行为信号的成员;发言极少的人可以不输出。\n"
"2. 每个成员的 topics、identity_clues、skill_signals、family_signals、life_stage_signals、value_preferences、habit_signals、engagement_traits 最多4个reply_taboos 最多3个。\n"
"3. representative_messages 只保留最能代表当天表达方式的短句最多3条。\n"
"4. 必须严格使用候选成员列表中的 wxid 和显示名。\n"
"5. identity_clues、family_signals、life_stage_signals 只能写公开聊天中出现的线索,不可把弱线索写成确定事实。\n"
"6. skill_signals 重点提炼成员解决问题、提供信息、组织表达、专业能力等信号。\n"
"7. social_role 只描述当天在群里的角色表现,例如:问题提出者、信息补充者、气氛调节者、组织推进者。\n"
"8. topics 更偏向持续关注的话题方向habit_signals 更偏向重复表达或互动习惯engagement_traits 更偏向参与方式。\n"
"9. value_preferences 只记录公开表达出的偏好,如效率优先、成本敏感、谨慎验证、乐于助人,不要写抽象大词。\n"
"10. summary_text 应是后台观察摘要,不要写成对用户说的话。\n"
f"群ID: {chatroom_id}\n"
f"日期: {digest_date}\n"
"候选成员:\n" + "\n".join(member_labels[:80]) + "\n"
"压缩后的群聊记录:\n" + compressed_chat
)
@staticmethod
def build_daily_digest_prompt(chatroom_id: str, wxid: str, display_name: str,
digest_date: str, messages: List[Dict]) -> str:
lines = []
for msg in messages[-80:]:
ts = str(msg.get("timestamp", ""))[11:16]
content = (msg.get("content") or "").replace("\n", " ").strip()[:180]
if content:
lines.append(f"[{ts}] {content}")
return (
"你是微信群后台的成员日观察摘要生成器。\n"
"请仅基于给定的当日公开聊天记录,提取对后续互动有帮助的中性行为观察。\n"
"不要做人格诊断、隐私猜测、负面评价,不要脑补群外信息。\n"
"这些日观察会被后续系统按周、按月持续累积,所以应优先输出长期可验证的行为线索,而不是一次性情绪。\n"
"输出严格 JSON不要 markdown。\n"
"{"
"\"topics\":[\"主题1\"],"
"\"identity_clues\":[\"身份线索1\"],"
"\"skill_signals\":[\"技能信号1\"],"
"\"family_signals\":[\"家庭线索1\"],"
"\"life_stage_signals\":[\"阶段线索1\"],"
"\"value_preferences\":[\"价值偏好1\"],"
"\"interaction_style\":\"一句中文\","
"\"message_pattern\":\"一句中文\","
"\"response_style_hint\":\"一句中文\","
"\"habit_signals\":[\"信号1\"],"
"\"engagement_traits\":[\"特征1\"],"
"\"decision_style\":\"一句中文\","
"\"social_role\":\"一句中文\","
"\"reply_taboos\":[\"避坑1\"],"
"\"temperament_signal\":\"一句中文,描述当天显露的沟通倾向,必须克制\","
"\"summary_text\":\"一段不超过100字的日摘要\","
"\"representative_messages\":[\"原话1\",\"原话2\"],"
"\"confidence\":0.0"
"}\n"
"要求:\n"
"1. topics、identity_clues、skill_signals、family_signals、life_stage_signals、value_preferences、habit_signals、engagement_traits 最多4个reply_taboos 最多3个。\n"
"2. temperament_signal 只能写当日可观察到的沟通倾向,不可上升为长期性格判断。\n"
"3. representative_messages 保留最能代表当天风格的短句最多3条。\n"
"4. identity_clues、family_signals、life_stage_signals 只能写线索,不可写成确定事实。\n"
"5. skill_signals 重点描述专业能力、工具熟练度、信息组织能力、问题解决能力等当天显露出的信号。\n"
"6. topics 尽量写持续关注方向避免写一次性插话habit_signals 只写当天已明显出现的表达或互动习惯。\n"
"7. value_preferences 只保留公开表达出的判断偏好,如效率优先、先验证再决策、重成本、重稳定。\n"
"8. summary_text 要像后台备注,不要像对话回复。\n"
f"成员: {display_name} ({wxid})\n"
f"群ID: {chatroom_id}\n"
f"日期: {digest_date}\n"
"当日消息:\n" + ("\n".join(lines) or "暂无")
)
@staticmethod
def build_period_digest_prompt(digest_type: str, chatroom_id: str, wxid: str,
display_name: str, period_key: str, items: List[Dict]) -> str:
structured_lines = []
for item in items:
structured = item.get("structured", {}) or {}
payload = {
"period_key": item.get("period_key"),
"summary_text": item.get("summary_text", ""),
"topics": structured.get("topics") or structured.get("stable_topics") or structured.get("long_term_topics") or [],
"identity_clues": structured.get("identity_clues") or structured.get("identity_traits") or [],
"skill_signals": structured.get("skill_signals") or structured.get("skill_profile") or [],
"family_signals": structured.get("family_signals") or structured.get("family_profile") or [],
"life_stage_signals": structured.get("life_stage_signals") or structured.get("life_stage_profile") or [],
"value_preferences": structured.get("value_preferences") or structured.get("value_profile") or [],
"habit_signals": structured.get("habit_signals") or structured.get("habit_patterns") or [],
"engagement_traits": structured.get("engagement_traits") or structured.get("stable_traits") or [],
"reply_preferences": structured.get("reply_preferences") or structured.get("long_term_reply_preferences") or [],
"social_role": structured.get("social_role") or structured.get("group_role") or "",
"decision_style": structured.get("decision_style") or structured.get("decision_profile") or "",
"temperament_signal": structured.get("temperament_signal") or structured.get("temperament_tendency") or "",
"recent_state": structured.get("recent_state") or [],
}
structured_lines.append(json.dumps(payload, ensure_ascii=False))
if digest_type == "weekly":
schema = (
"{"
"\"stable_topics\":[\"主题1\"],"
"\"identity_traits\":[\"身份特征1\"],"
"\"skill_profile\":[\"技能画像1\"],"
"\"family_profile\":[\"家庭线索1\"],"
"\"life_stage_profile\":[\"阶段线索1\"],"
"\"value_profile\":[\"价值偏好1\"],"
"\"stable_traits\":[\"特征1\"],"
"\"habit_patterns\":[\"习惯1\"],"
"\"reply_preferences\":[\"偏好1\"],"
"\"group_role\":\"一句中文\","
"\"decision_profile\":\"一句中文\","
"\"recent_state\":[\"状态1\"],"
"\"temperament_tendency\":\"一句中文\","
"\"summary_text\":\"一段不超过120字的周摘要\","
"\"confidence\":0.0"
"}"
)
extra = "请从多个日摘要中提炼本周重复出现的模式,过滤单日噪音。"
else:
schema = (
"{"
"\"long_term_topics\":[\"主题1\"],"
"\"identity_traits\":[\"身份特征1\"],"
"\"skill_profile\":[\"技能画像1\"],"
"\"family_profile\":[\"家庭线索1\"],"
"\"life_stage_profile\":[\"阶段线索1\"],"
"\"value_profile\":[\"价值偏好1\"],"
"\"stable_traits\":[\"特征1\"],"
"\"habit_patterns\":[\"习惯1\"],"
"\"long_term_reply_preferences\":[\"偏好1\"],"
"\"group_role\":\"一句中文\","
"\"decision_profile\":\"一句中文\","
"\"phase_state\":[\"状态1\"],"
"\"temperament_tendency\":\"一句中文\","
"\"summary_text\":\"一段不超过140字的月摘要\","
"\"confidence\":0.0"
"}"
)
extra = "请从多个周摘要中提炼阶段性稳定特征,只有反复出现的模式才能进入长期层。"
return (
f"你是微信群后台的成员{digest_type}摘要生成器。\n"
f"{extra}\n"
"不可做心理诊断、负面评价、隐私猜测。输出严格 JSON不要 markdown。\n"
f"{schema}\n"
"要求:\n"
"1. 所有列表字段最多5项必须中性克制。\n"
"2. 只有多个下级摘要反复出现的特征,才允许写进 stable_traits / habit_patterns / long_term_reply_preferences。\n"
"3. recent_state / phase_state 只描述当前阶段状态,不要冒充长期人格。\n"
"4. identity_traits、family_profile、life_stage_profile 只能保留反复出现的公开线索,不可编造事实。\n"
"5. skill_profile 要优先提炼稳定出现的能力、专业方向、擅长处理的问题类型。\n"
"6. group_role 描述其在群中的长期角色位置decision_profile 描述其决策与判断风格。\n"
"7. value_profile 需要优先保留真正反复出现的判断偏好,如效率优先、成本敏感、风险谨慎、愿意分享。\n"
f"成员: {display_name} ({wxid})\n"
f"群ID: {chatroom_id}\n"
f"周期: {period_key}\n"
"下级摘要:\n" + ("\n".join(structured_lines) or "暂无")
)
@staticmethod
def build_final_context_prompt(chatroom_id: str, wxid: str, display_name: str,
monthly_digests: List[Dict], weekly_digests: List[Dict],
daily_digests: List[Dict]) -> str:
monthly_lines = [json.dumps(item.get("structured", {}), ensure_ascii=False) for item in monthly_digests[:6]]
weekly_lines = [json.dumps(item.get("structured", {}), ensure_ascii=False) for item in weekly_digests[:4]]
daily_lines = [json.dumps(item.get("structured", {}), ensure_ascii=False) for item in daily_digests[:6]]
return (
"你是微信群后台的最终成员交互画像整理器。\n"
"请结合月级、周级、日级摘要,输出一个既有长期层又有近期层的后台交互画像。\n"
"不要做敏感推断、心理诊断、隐私猜测。输出严格 JSON不要 markdown。\n"
"{"
"\"activity_level\":\"高活跃|中活跃|低活跃|观察中\","
"\"message_pattern\":\"一句中文\","
"\"interaction_style\":\"一句中文\","
"\"response_style_hint\":\"一句中文\","
"\"topics_of_interest\":[\"主题1\"],"
"\"recent_focus\":[\"近期主题1\"],"
"\"identity_traits\":[\"身份线索1\"],"
"\"skill_profile\":[\"技能画像1\"],"
"\"family_profile\":[\"家庭线索1\"],"
"\"life_stage_profile\":[\"阶段线索1\"],"
"\"value_profile\":[\"价值偏好1\"],"
"\"stable_traits\":[\"长期特征1\"],"
"\"habit_patterns\":[\"习惯1\"],"
"\"long_term_reply_preferences\":[\"偏好1\"],"
"\"group_role\":\"一句中文\","
"\"decision_profile\":\"一句中文\","
"\"recent_state\":[\"近期状态1\"],"
"\"temperament_tendency\":\"一句中文\","
"\"summary_text\":\"一段不超过150字的后台摘要\","
"\"confidence\":0.0,"
"\"engagement_traits\":[\"特征1\"],"
"\"reply_taboos\":[\"避坑1\"]"
"}\n"
"要求:\n"
"1. stable_traits、habit_patterns、long_term_reply_preferences 只从月级和多次重复证据中提取。\n"
"2. recent_focus、recent_state 更依赖最近周级和日级。\n"
"3. summary_text 要像后台备注,不要明显暴露在给用户做画像。\n"
"4. identity_traits、family_profile、life_stage_profile 必须写成公开线索或长期观察,不得伪造事实。\n"
"5. skill_profile 要尽量覆盖专业能力、问题解决能力、表达组织能力、资源协调能力等维度。\n"
"6. group_role 要描述其在群中的角色定位decision_profile 要描述其决策/判断方式。\n"
"7. 如果月级与周级证据不足,宁可少写,也不要把短期状态写成长期人格。\n"
f"成员: {display_name} ({wxid})\n"
f"群ID: {chatroom_id}\n"
"月级摘要:\n" + ("\n".join(monthly_lines) or "暂无")
+ "\n周级摘要:\n" + ("\n".join(weekly_lines) or "暂无")
+ "\n日级摘要:\n" + ("\n".join(daily_lines) or "暂无")
)