增强成员锐评的历史画像分析

1. 为锐评插件新增近60天成员历史画像窗口，汇总周/月/少量日摘要形成长期人设。 2. 新增近60天群历史总结背景汇总，让锐评同时参考群长期语境与近期发言。 3. 调整提示词，明确要求模型结合当前画像与历史画像的反差进行分析。
2026-04-27 14:39:00 +08:00
parent f783e8ba0c
commit 290e44986e
2 changed files with 190 additions and 0 deletions
--- a/plugins/member_roast/config.toml
+++ b/plugins/member_roast/config.toml
@@ -27,6 +27,13 @@ message_limit = 50
 min_message_count = 8
 context_stale_hours = 24
 name_match_min_chars = 2
+# 历史画像窗口：
+# 1. 用户要求明确使用“历史两个月画像 + 当前本人画像”；
+# 2. 这里统一按 60 天窗口汇总成员历史摘要与群历史总结；
+# 3. 只抽取结构化关键信息，不会把全部原文暴力塞给模型。
+history_profile_days = 60
+history_member_digest_limit = 16
+history_group_summary_limit = 10

 [style]
 # 文案风格参数：
--- a/plugins/member_roast/main.py
+++ b/plugins/member_roast/main.py
@@ -11,8 +11,10 @@ from base.plugin_common.message_plugin_interface import MessagePluginInterface
 from base.plugin_common.plugin_interface import PluginStatus
 from db.connection import DBConnectionManager
 from db.contacts_db import ContactsDBOperator
+from db.member_digest_db import MemberDigestDBOperator
 from db.member_context_db import MemberContextDBOperator
 from db.message_storage import MessageStorageDB
+from db.message_summary_db import MessageSummaryDBOperator
 from plugins.ai_auto_response.memory.group_memory_profile import GroupMemoryService
 from plugins.member_context.service import MemberContextService
 from utils.ai.unified_llm import UnifiedLLMClient
@@ -39,8 +41,10 @@ class MemberRoastService:
        self.db_manager = db_manager
        self.plugin_config = plugin_config or {}
        self.contacts_db = ContactsDBOperator(db_manager)
+        self.member_digest_db = MemberDigestDBOperator(db_manager)
        self.member_context_db = MemberContextDBOperator(db_manager)
        self.message_db = MessageStorageDB(db_manager)
+        self.message_summary_db = MessageSummaryDBOperator(db_manager)
        self.member_context_service = MemberContextService(db_manager, plugin_config or {})
        # 群画像服务本身已经带“按天/按总结刷新”的快照逻辑，直接复用即可。
        self.group_memory_service = GroupMemoryService(db_manager, {})
@@ -51,6 +55,9 @@ class MemberRoastService:
        self.message_limit = max(int(profile_cfg.get("message_limit", 50) or 50), 1)
        self.min_message_count = max(int(profile_cfg.get("min_message_count", 8) or 8), 1)
        self.context_stale_hours = max(int(profile_cfg.get("context_stale_hours", 24) or 24), 1)
+        self.history_profile_days = max(int(profile_cfg.get("history_profile_days", 60) or 60), 1)
+        self.history_member_digest_limit = max(int(profile_cfg.get("history_member_digest_limit", 16) or 16), 1)
+        self.history_group_summary_limit = max(int(profile_cfg.get("history_group_summary_limit", 10) or 10), 1)

    def build_roast_payload(self, group_id: str, target_wxid: str) -> Tuple[bool, Dict[str, Any]]:
        """构建锐评所需的完整画像载荷。"""
@@ -81,7 +88,9 @@ class MemberRoastService:
            }

        member_context = self._load_or_refresh_member_context(group_id, target_wxid)
+        historical_member_profile = self._build_historical_member_profile(group_id, target_wxid)
        group_memory_profile = self.group_memory_service.build_group_memory_profile(group_id, group_name=group_name)
+        historical_group_profile = self._build_historical_group_profile(group_id)
        active_dates = self.message_db.get_member_active_dates(group_id, target_wxid, days=min(self.sample_days, 180)) or []

        payload = {
@@ -90,7 +99,9 @@ class MemberRoastService:
            "target_wxid": target_wxid,
            "display_name": display_name,
            "member_context": member_context or {},
+            "historical_member_profile": historical_member_profile or {},
            "group_memory_profile": group_memory_profile or {},
+            "historical_group_profile": historical_group_profile or {},
            "recent_messages": recent_messages,
            "message_count": len(recent_messages),
            "active_days": len(active_dates),
@@ -159,6 +170,151 @@ class MemberRoastService:
                continue
        return None

+    def _build_historical_member_profile(self, group_id: str, target_wxid: str) -> Dict[str, Any]:
+        """构建近两个月成员历史画像摘要。
+
+        数据来源优先级：
+        1. 成员周/月摘要，代表更稳定的阶段画像；
+        2. 少量近期日摘要，用来补充“最近几次明显状态变化”；
+        3. 最终输出压缩成结构化摘要，避免把几十条 digest 原文直接塞给模型。
+        """
+        since_time = datetime.now() - timedelta(days=self.history_profile_days)
+        weekly_digests = self.member_digest_db.list_digests(group_id, target_wxid, "weekly", limit=self.history_member_digest_limit) or []
+        monthly_digests = self.member_digest_db.list_digests(group_id, target_wxid, "monthly", limit=max(self.history_member_digest_limit, 8)) or []
+        daily_digests = self.member_digest_db.list_digests(group_id, target_wxid, "daily", limit=min(self.history_member_digest_limit, 8)) or []
+
+        def in_range(item: Dict[str, Any]) -> bool:
+            end_time = self._safe_parse_datetime(str(item.get("period_end", "") or ""))
+            if not end_time:
+                return False
+            return end_time >= since_time
+
+        weekly_digests = [item for item in weekly_digests if in_range(item)]
+        monthly_digests = [item for item in monthly_digests if in_range(item)]
+        daily_digests = [item for item in daily_digests if in_range(item)]
+
+        focus_keys = [
+            "long_term_topics", "stable_topics", "topics", "identity_traits", "skill_profile",
+            "problem_solving_profile", "habit_patterns", "expression_profile", "recent_state",
+            "engagement_traits", "reply_preferences", "long_term_reply_preferences",
+        ]
+        phrase_keys = [
+            "group_role", "temperament_tendency", "decision_profile", "message_pattern",
+            "interaction_style", "phase_state",
+        ]
+
+        list_counter: Dict[str, Dict[str, int]] = {key: {} for key in focus_keys}
+        phrase_counter: Dict[str, Dict[str, int]] = {key: {} for key in phrase_keys}
+        timeline: List[str] = []
+
+        def push_count(bucket: Dict[str, Dict[str, int]], key: str, value: str, weight: int = 1) -> None:
+            normalized = str(value or "").strip()
+            if not normalized:
+                return
+            bucket[key][normalized] = bucket[key].get(normalized, 0) + weight
+
+        # 月摘要权重大于周摘要，周摘要又高于日摘要。
+        weighted_sources = [
+            (monthly_digests, 3),
+            (weekly_digests, 2),
+            (daily_digests, 1),
+        ]
+        for digest_list, weight in weighted_sources:
+            for item in digest_list:
+                structured = item.get("structured", {}) or {}
+                summary_text = str(item.get("summary_text", "") or "").strip()
+                period_key = str(item.get("period_key", "") or "")
+                if summary_text:
+                    timeline.append(f"{period_key}: {summary_text[:80]}")
+
+                for key in focus_keys:
+                    values = structured.get(key, []) or []
+                    if not isinstance(values, list):
+                        values = [values]
+                    for value in values:
+                        push_count(list_counter, key, str(value or ""), weight=weight)
+
+                for key in phrase_keys:
+                    push_count(phrase_counter, key, str(structured.get(key, "") or ""), weight=weight)
+
+        def top_items(counter_map: Dict[str, int], limit: int = 4) -> List[str]:
+            ordered = sorted(counter_map.items(), key=lambda item: (-item[1], len(item[0])))
+            return [name for name, _ in ordered[:limit]]
+
+        history_summary = {
+            "history_days": self.history_profile_days,
+            "source_counts": {
+                "monthly": len(monthly_digests),
+                "weekly": len(weekly_digests),
+                "daily": len(daily_digests),
+            },
+            "stable_topics": top_items(list_counter["long_term_topics"] or list_counter["stable_topics"] or list_counter["topics"]),
+            "identity_traits": top_items(list_counter["identity_traits"]),
+            "skill_profile": top_items(list_counter["skill_profile"]),
+            "problem_solving_profile": top_items(list_counter["problem_solving_profile"]),
+            "habit_patterns": top_items(list_counter["habit_patterns"]),
+            "expression_profile": top_items(list_counter["expression_profile"]),
+            "recent_state": top_items(list_counter["recent_state"]),
+            "engagement_traits": top_items(list_counter["engagement_traits"]),
+            "reply_preferences": top_items(list_counter["reply_preferences"] or list_counter["long_term_reply_preferences"]),
+            "group_role": top_items(phrase_counter["group_role"], limit=2),
+            "temperament_tendency": top_items(phrase_counter["temperament_tendency"], limit=2),
+            "decision_profile": top_items(phrase_counter["decision_profile"], limit=2),
+            "message_pattern": top_items(phrase_counter["message_pattern"], limit=2),
+            "interaction_style": top_items(phrase_counter["interaction_style"], limit=2),
+            "phase_state": top_items(phrase_counter["phase_state"], limit=3),
+            "timeline": timeline[:8],
+        }
+        return history_summary
+
+    def _build_historical_group_profile(self, group_id: str) -> Dict[str, Any]:
+        """构建近两个月群历史总结摘要。"""
+        rows = self._load_recent_group_summaries(group_id)
+        topic_counter: Dict[str, int] = {}
+        timeline: List[str] = []
+
+        for row in rows:
+            period_key = str(row.get("period_key", "") or "")
+            summary_text = str(row.get("summary_text", "") or "").strip()
+            if summary_text:
+                timeline.append(f"{period_key}: {summary_text[:90]}")
+            meta = row.get("meta", {}) or {}
+            for key in ("topics", "focus_topics", "top_topics", "keywords"):
+                values = meta.get(key, []) or []
+                if not isinstance(values, list):
+                    values = [values]
+                for value in values:
+                    normalized = str(value or "").strip()
+                    if not normalized:
+                        continue
+                    topic_counter[normalized] = topic_counter.get(normalized, 0) + 1
+
+        ordered_topics = sorted(topic_counter.items(), key=lambda item: (-item[1], len(item[0])))
+        return {
+            "history_days": self.history_profile_days,
+            "summary_count": len(rows),
+            "focus_topics": [name for name, _ in ordered_topics[:8]],
+            "timeline": timeline[:8],
+        }
+
+    def _load_recent_group_summaries(self, group_id: str) -> List[Dict[str, Any]]:
+        """读取近两个月群总结记录。"""
+        since_time = datetime.now() - timedelta(days=self.history_profile_days)
+        sql = """
+        SELECT *
+        FROM t_message_summary
+        WHERE chatroom_id = %s
+          AND summary_type IN ('daily', 'manual')
+          AND period_end >= %s
+        ORDER BY period_end DESC, update_time DESC
+        LIMIT %s
+        """
+        rows = self.message_summary_db.execute_query(
+            sql,
+            (group_id, since_time.strftime("%Y-%m-%d %H:%M:%S"), self.history_group_summary_limit),
+        ) or []
+        return [self.message_summary_db._deserialize_row(dict(row)) or {} for row in rows]
+

 class MemberRoastPlugin(MessagePluginInterface):
    """成员锐评插件。
@@ -496,6 +652,8 @@ class MemberRoastPlugin(MessagePluginInterface):
        """拼装用户提示词。"""
        member_context = payload.get("member_context", {}) or {}
        group_memory_profile = payload.get("group_memory_profile", {}) or {}
+        historical_member_profile = payload.get("historical_member_profile", {}) or {}
+        historical_group_profile = payload.get("historical_group_profile", {}) or {}
        recent_messages = payload.get("recent_messages", []) or []
        meta = member_context.get("meta", {}) or {}
        group_style = group_memory_profile.get("style_profile", {}) or {}
@@ -540,17 +698,42 @@ class MemberRoastPlugin(MessagePluginInterface):
                "气质倾向": meta.get("temperament_tendency", ""),
                "近期状态": meta.get("recent_state", []),
            },
+            "成员近两个月历史画像": {
+                "窗口天数": historical_member_profile.get("history_days", self.history_profile_days),
+                "摘要来源数": historical_member_profile.get("source_counts", {}),
+                "稳定主题": historical_member_profile.get("stable_topics", []),
+                "身份特征": historical_member_profile.get("identity_traits", []),
+                "技能画像": historical_member_profile.get("skill_profile", []),
+                "处理问题方式": historical_member_profile.get("problem_solving_profile", []),
+                "习惯模式": historical_member_profile.get("habit_patterns", []),
+                "表达标签": historical_member_profile.get("expression_profile", []),
+                "互动特征": historical_member_profile.get("engagement_traits", []),
+                "长期回复偏好": historical_member_profile.get("reply_preferences", []),
+                "长期群内角色": historical_member_profile.get("group_role", []),
+                "长期气质倾向": historical_member_profile.get("temperament_tendency", []),
+                "长期发言模式": historical_member_profile.get("message_pattern", []),
+                "长期互动风格": historical_member_profile.get("interaction_style", []),
+                "阶段变化轨迹": historical_member_profile.get("phase_state", []),
+                "历史时间线": historical_member_profile.get("timeline", []),
+            },
            "群聊背景": {
                "群名": payload.get("group_name", ""),
                "长期主题": group_memory_profile.get("focus_topics", []),
                "群风格": group_style,
                "群摘要": group_memory_profile.get("summary_text", ""),
            },
+            "群近两个月历史背景": {
+                "窗口天数": historical_group_profile.get("history_days", self.history_profile_days),
+                "总结条数": historical_group_profile.get("summary_count", 0),
+                "历史关注主题": historical_group_profile.get("focus_topics", []),
+                "历史时间线": historical_group_profile.get("timeline", []),
+            },
            "最近发言样本": recent_lines,
            "额外要求": [
                "要像熟人看破不说破，不要像机器写分析。",
                "可以先抬后杀，也可以连续抓几个典型症状。",
                "如果这人明显偏技术、答疑、摸鱼、抽象、嘴硬、复读机、群气氛组，请点出来。",
+                "必须同时参考“当前成员画像”和“近两个月历史画像”，如果两者有反差，要把这种反差写出来。",
                f"发起请求的人是：{requester_name}" if requester_name else "",
            ],
        }