Files
abot/plugins/member_context/prompt_builder.py
liuwei bfd0dbc15c 接入成员画像 Dify 工作流并清理旧提取逻辑
- 新增 member_context 专用 DifyClient,统一兼容 completion 与 workflow 两种调用模式
- 将成员画像插件默认切换到 Dify workflow 模式,配置改用新的 workflow 应用与 workflows/run 接口
- 生成可直接导入 Dify 的成员画像工作流 DSL 文件,方便后台一键导入和发布
- 补充 Dify 工作流接入说明文档,明确输入字段、输出字段、发布步骤与插件消费方式
- 清理旧的单成员日摘要提取链路,日级画像统一收敛到群日批量提取路径,减少无效分支和历史残留
- 去除 member_context 内部多处旧 requests 直连调用,统一改为通过 DifyClient 调用 AI 服务
- 优化群日批量结果解析逻辑,只按 wxid 作为唯一主键识别成员,不再依赖昵称做唯一判断
- 新增按 wxid 的结果去重与完整度评分逻辑,遇到重复成员结果时优先保留字段更完整、置信度更高的一条
- 保留现有初始化、增量、周/月聚合与最终画像生成链路,同时剔除 workflow 接入后已无效或低价值的旧逻辑
- 为后续继续收紧 fallback 标记、增强后台质量诊断和优化工作流输出稳定性打下基础
2026-04-02 14:25:50 +08:00

198 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import json
from typing import Dict, List
class MemberContextPromptBuilder:
"""成员分层画像提示词构建器"""
@staticmethod
def build_group_daily_digest_prompt(chatroom_id: str, digest_date: str,
member_labels: List[str], compressed_chat: str) -> str:
return (
"你是微信群后台的成员日观察批量提取器。\n"
"请基于给定的一天群聊记录,识别当天有参与发言的成员,并分别提取中性、克制的成员日观察摘要。\n"
"不要做心理诊断、隐私猜测、负面评价,不要输出未在候选名单中的成员。\n"
"你的输出将被后续系统按周、按月持续累积,因此优先提取可复用、可累计、可验证的行为信号,而不是一次性的情绪和玩笑。\n"
"输出严格 JSON不要 markdown。\n"
"{"
"\"members\":["
"{"
"\"wxid\":\"成员wxid\","
"\"display_name\":\"成员显示名\","
"\"topics\":[\"主题1\"],"
"\"identity_clues\":[\"身份线索1\"],"
"\"skill_signals\":[\"技能信号1\"],"
"\"family_signals\":[\"家庭线索1\"],"
"\"life_stage_signals\":[\"阶段线索1\"],"
"\"value_preferences\":[\"价值偏好1\"],"
"\"interaction_style\":\"一句中文\","
"\"message_pattern\":\"一句中文\","
"\"response_style_hint\":\"一句中文\","
"\"habit_signals\":[\"信号1\"],"
"\"engagement_traits\":[\"特征1\"],"
"\"decision_style\":\"一句中文\","
"\"social_role\":\"一句中文,描述当天在群中的角色表现\","
"\"reply_taboos\":[\"避坑1\"],"
"\"temperament_signal\":\"一句中文,描述当天沟通倾向,必须克制\","
"\"summary_text\":\"一段不超过100字的日摘要\","
"\"representative_messages\":[\"原话1\",\"原话2\"],"
"\"confidence\":0.0"
"}"
"]"
"}\n"
"要求:\n"
"1. 只输出当天真正参与发言且能看出明确行为信号的成员;发言极少的人可以不输出。\n"
"2. 每个成员的 topics、identity_clues、skill_signals、family_signals、life_stage_signals、value_preferences、habit_signals、engagement_traits 最多4个reply_taboos 最多3个。\n"
"3. representative_messages 只保留最能代表当天表达方式的短句最多3条。\n"
"4. 必须严格使用候选成员列表中的 wxid 和显示名。\n"
"5. identity_clues、family_signals、life_stage_signals 只能写公开聊天中出现的线索,不可把弱线索写成确定事实。\n"
"6. skill_signals 重点提炼成员解决问题、提供信息、组织表达、专业能力等信号。\n"
"7. social_role 只描述当天在群里的角色表现,例如:问题提出者、信息补充者、气氛调节者、组织推进者。\n"
"8. topics 更偏向持续关注的话题方向habit_signals 更偏向重复表达或互动习惯engagement_traits 更偏向参与方式。\n"
"9. value_preferences 只记录公开表达出的偏好,如效率优先、成本敏感、谨慎验证、乐于助人,不要写抽象大词。\n"
"10. summary_text 应是后台观察摘要,不要写成对用户说的话。\n"
f"群ID: {chatroom_id}\n"
f"日期: {digest_date}\n"
"候选成员:\n" + "\n".join(member_labels[:80]) + "\n"
"压缩后的群聊记录:\n" + compressed_chat
)
@staticmethod
def build_period_digest_prompt(digest_type: str, chatroom_id: str, wxid: str,
display_name: str, period_key: str, items: List[Dict]) -> str:
structured_lines = []
for item in items:
structured = item.get("structured", {}) or {}
payload = {
"period_key": item.get("period_key"),
"summary_text": item.get("summary_text", ""),
"topics": structured.get("topics") or structured.get("stable_topics") or structured.get("long_term_topics") or [],
"identity_clues": structured.get("identity_clues") or structured.get("identity_traits") or [],
"skill_signals": structured.get("skill_signals") or structured.get("skill_profile") or [],
"family_signals": structured.get("family_signals") or structured.get("family_profile") or [],
"life_stage_signals": structured.get("life_stage_signals") or structured.get("life_stage_profile") or [],
"value_preferences": structured.get("value_preferences") or structured.get("value_profile") or [],
"habit_signals": structured.get("habit_signals") or structured.get("habit_patterns") or [],
"engagement_traits": structured.get("engagement_traits") or structured.get("stable_traits") or [],
"reply_preferences": structured.get("reply_preferences") or structured.get("long_term_reply_preferences") or [],
"social_role": structured.get("social_role") or structured.get("group_role") or "",
"decision_style": structured.get("decision_style") or structured.get("decision_profile") or "",
"temperament_signal": structured.get("temperament_signal") or structured.get("temperament_tendency") or "",
"recent_state": structured.get("recent_state") or [],
}
structured_lines.append(json.dumps(payload, ensure_ascii=False))
if digest_type == "weekly":
schema = (
"{"
"\"stable_topics\":[\"主题1\"],"
"\"identity_traits\":[\"身份特征1\"],"
"\"skill_profile\":[\"技能画像1\"],"
"\"family_profile\":[\"家庭线索1\"],"
"\"life_stage_profile\":[\"阶段线索1\"],"
"\"value_profile\":[\"价值偏好1\"],"
"\"stable_traits\":[\"特征1\"],"
"\"habit_patterns\":[\"习惯1\"],"
"\"reply_preferences\":[\"偏好1\"],"
"\"group_role\":\"一句中文\","
"\"decision_profile\":\"一句中文\","
"\"recent_state\":[\"状态1\"],"
"\"temperament_tendency\":\"一句中文\","
"\"summary_text\":\"一段不超过120字的周摘要\","
"\"confidence\":0.0"
"}"
)
extra = "请从多个日摘要中提炼本周重复出现的模式,过滤单日噪音。"
else:
schema = (
"{"
"\"long_term_topics\":[\"主题1\"],"
"\"identity_traits\":[\"身份特征1\"],"
"\"skill_profile\":[\"技能画像1\"],"
"\"family_profile\":[\"家庭线索1\"],"
"\"life_stage_profile\":[\"阶段线索1\"],"
"\"value_profile\":[\"价值偏好1\"],"
"\"stable_traits\":[\"特征1\"],"
"\"habit_patterns\":[\"习惯1\"],"
"\"long_term_reply_preferences\":[\"偏好1\"],"
"\"group_role\":\"一句中文\","
"\"decision_profile\":\"一句中文\","
"\"phase_state\":[\"状态1\"],"
"\"temperament_tendency\":\"一句中文\","
"\"summary_text\":\"一段不超过140字的月摘要\","
"\"confidence\":0.0"
"}"
)
extra = "请从多个周摘要中提炼阶段性稳定特征,只有反复出现的模式才能进入长期层。"
return (
f"你是微信群后台的成员{digest_type}摘要生成器。\n"
f"{extra}\n"
"不可做心理诊断、负面评价、隐私猜测。输出严格 JSON不要 markdown。\n"
f"{schema}\n"
"要求:\n"
"1. 所有列表字段最多5项必须中性克制。\n"
"2. 只有多个下级摘要反复出现的特征,才允许写进 stable_traits / habit_patterns / long_term_reply_preferences。\n"
"3. recent_state / phase_state 只描述当前阶段状态,不要冒充长期人格。\n"
"4. identity_traits、family_profile、life_stage_profile 只能保留反复出现的公开线索,不可编造事实。\n"
"5. skill_profile 要优先提炼稳定出现的能力、专业方向、擅长处理的问题类型。\n"
"6. group_role 描述其在群中的长期角色位置decision_profile 描述其决策与判断风格。\n"
"7. value_profile 需要优先保留真正反复出现的判断偏好,如效率优先、成本敏感、风险谨慎、愿意分享。\n"
f"成员: {display_name} ({wxid})\n"
f"群ID: {chatroom_id}\n"
f"周期: {period_key}\n"
"下级摘要:\n" + ("\n".join(structured_lines) or "暂无")
)
@staticmethod
def build_final_context_prompt(chatroom_id: str, wxid: str, display_name: str,
monthly_digests: List[Dict], weekly_digests: List[Dict],
daily_digests: List[Dict]) -> str:
monthly_lines = [json.dumps(item.get("structured", {}), ensure_ascii=False) for item in monthly_digests[:6]]
weekly_lines = [json.dumps(item.get("structured", {}), ensure_ascii=False) for item in weekly_digests[:4]]
daily_lines = [json.dumps(item.get("structured", {}), ensure_ascii=False) for item in daily_digests[:6]]
return (
"你是微信群后台的最终成员交互画像整理器。\n"
"请结合月级、周级、日级摘要,输出一个既有长期层又有近期层的后台交互画像。\n"
"不要做敏感推断、心理诊断、隐私猜测。输出严格 JSON不要 markdown。\n"
"{"
"\"activity_level\":\"高活跃|中活跃|低活跃|观察中\","
"\"message_pattern\":\"一句中文\","
"\"interaction_style\":\"一句中文\","
"\"response_style_hint\":\"一句中文\","
"\"topics_of_interest\":[\"主题1\"],"
"\"recent_focus\":[\"近期主题1\"],"
"\"identity_traits\":[\"身份线索1\"],"
"\"skill_profile\":[\"技能画像1\"],"
"\"family_profile\":[\"家庭线索1\"],"
"\"life_stage_profile\":[\"阶段线索1\"],"
"\"value_profile\":[\"价值偏好1\"],"
"\"stable_traits\":[\"长期特征1\"],"
"\"habit_patterns\":[\"习惯1\"],"
"\"long_term_reply_preferences\":[\"偏好1\"],"
"\"group_role\":\"一句中文\","
"\"decision_profile\":\"一句中文\","
"\"recent_state\":[\"近期状态1\"],"
"\"temperament_tendency\":\"一句中文\","
"\"summary_text\":\"一段不超过150字的后台摘要\","
"\"confidence\":0.0,"
"\"engagement_traits\":[\"特征1\"],"
"\"reply_taboos\":[\"避坑1\"]"
"}\n"
"要求:\n"
"1. stable_traits、habit_patterns、long_term_reply_preferences 只从月级和多次重复证据中提取。\n"
"2. recent_focus、recent_state 更依赖最近周级和日级。\n"
"3. summary_text 要像后台备注,不要明显暴露在给用户做画像。\n"
"4. identity_traits、family_profile、life_stage_profile 必须写成公开线索或长期观察,不得伪造事实。\n"
"5. skill_profile 要尽量覆盖专业能力、问题解决能力、表达组织能力、资源协调能力等维度。\n"
"6. group_role 要描述其在群中的角色定位decision_profile 要描述其决策/判断方式。\n"
"7. 如果月级与周级证据不足,宁可少写,也不要把短期状态写成长期人格。\n"
f"成员: {display_name} ({wxid})\n"
f"群ID: {chatroom_id}\n"
"月级摘要:\n" + ("\n".join(monthly_lines) or "暂无")
+ "\n周级摘要:\n" + ("\n".join(weekly_lines) or "暂无")
+ "\n日级摘要:\n" + ("\n".join(daily_lines) or "暂无")
)