精炼成员锐评的提示词与输入压缩

- 将最近200条发言改为结构化提炼并限制代表样本数量 - 压缩历史时间线和空字段，减少提示词体积并降低幻觉 - 下调模型温度与最大输出token，并修复历史窗口属性兜底
2026-04-27 14:52:52 +08:00
parent e2a6356bab
commit 81dfd44b14
2 changed files with 195 additions and 21 deletions
--- a/plugins/member_roast/config.toml
+++ b/plugins/member_roast/config.toml
@@ -14,8 +14,12 @@ command_format = """
 # 2. “锐评”更需要创意表达、梗感和自然语言发挥；
 # 3. 因此优先复用 chat.main，既稳定又方便后续单独切换模型。
 scene = "chat.main"
-temperature = 0.9
-max_tokens = 700
+# 模型参数往“稳一点、省一点”方向收：
+# 1. 锐评需要一点攻击性，但不需要高温到处乱飞；
+# 2. 最近发言已经被结构化提炼，正文也限制在一屏内，token 上限没必要放太大；
+# 3. 这样可以明显降低幻觉概率和单次调用成本。
+temperature = 0.65
+max_tokens = 480
 timeout_seconds = 120
 max_retries = 2
 retry_delay_seconds = 1.5
--- a/plugins/member_roast/main.py
+++ b/plugins/member_roast/main.py
@@ -2,6 +2,7 @@
 import json
 import re
 import xml.etree.ElementTree as ET
+from collections import Counter
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional, Tuple

@@ -336,6 +337,21 @@ class MemberRoastPlugin(MessagePluginInterface):

    FEATURE_KEY = "MEMBER_ROAST"
    FEATURE_DESCRIPTION = "🗡️ 成员锐评 [@机器人 锐评一下 @某人]"
+    RECENT_MESSAGE_STOPWORDS = {
+        "这个", "那个", "就是", "然后", "但是", "还是", "我们", "你们", "他们", "自己", "一下",
+        "已经", "没有", "一个", "可以", "什么", "怎么", "今天", "昨天", "现在", "时候", "知道",
+        "觉得", "真的", "感觉", "不是", "还有", "因为", "所以", "这里", "那里", "一下子", "的话",
+        "and", "the", "for", "with", "that", "this", "from", "have", "just", "like",
+    }
+    # 最近消息提炼后的各项上限统一收在这里：
+    # 1. 方便后续继续调 token 成本时只改一处；
+    # 2. 避免不同方法里散落硬编码，导致线上效果不一致；
+    # 3. 上限偏保守，优先保“稳定特征”而不是堆材料。
+    RECENT_REPEAT_LIMIT = 6
+    RECENT_KEYWORD_LIMIT = 12
+    RECENT_SAMPLE_LIMIT = 8
+    PROMPT_TIMELINE_LIMIT = 6
+    PROMPT_TEXT_LIMIT = 120

    @property
    def name(self) -> str:
@@ -383,6 +399,11 @@ class MemberRoastPlugin(MessagePluginInterface):
        self.min_output_chars = 140
        self.sharpness_level = "high"
        self.name_match_min_chars = 2
+        # 这里给一个默认值兜底：
+        # 1. `_build_user_prompt` 会直接使用该窗口天数；
+        # 2. 如果插件刚构造、但还没完整初始化就被调用，至少不会因为属性不存在直接报错；
+        # 3. 真正运行时仍会在 `initialize` 里按配置覆盖。
+        self.history_profile_days = 60

    def initialize(self, context: Dict[str, Any]) -> bool:
        """初始化插件。"""
@@ -405,6 +426,11 @@ class MemberRoastPlugin(MessagePluginInterface):

        profile_cfg = self._config.get("profile", {}) or {}
        self.name_match_min_chars = max(int(profile_cfg.get("name_match_min_chars", 2) or 2), 1)
+        # 历史窗口需要同步到插件实例本身：
+        # 1. prompt 组装阶段会直接引用它；
+        # 2. 之前只有 service 上有这个值，运行时存在属性缺失风险；
+        # 3. 这里和 service 保持同一配置口径，避免“两边窗口不一致”。
+        self.history_profile_days = max(int(profile_cfg.get("history_profile_days", 60) or 60), 1)

        db_manager = context.get("db_manager")
        if not db_manager:
@@ -662,21 +688,11 @@ class MemberRoastPlugin(MessagePluginInterface):
        meta = member_context.get("meta", {}) or {}
        group_style = group_memory_profile.get("style_profile", {}) or {}

-        # 最近 50 条发言是这次锐评最关键的“即时素材”：
-        # 1. 画像决定“这个人长期像谁”；
-        # 2. 最近发言决定“这阵子他又在发什么病”；
-        # 3. 两者结合，模型才更容易产出既稳定又有当期节目效果的锐评。
-        recent_lines = []
-        for idx, item in enumerate(recent_messages, start=1):
-            ts = item.get("timestamp")
-            if isinstance(ts, datetime):
-                ts_text = ts.strftime("%m-%d %H:%M")
-            else:
-                ts_text = str(ts or "")[5:16] if str(ts or "") else ""
-            content = str(item.get("content", "") or "").replace("\n", " ").strip()
-            if not content:
-                continue
-            recent_lines.append(f"{idx}. [{ts_text}] {content}")
+        # 最近 200 条消息不再原样整包塞给模型：
+        # 1. 原样传会让 token 体积迅速膨胀；
+        # 2. 模型也容易被偶发句子带偏，出现“抓住一条就开始瞎判”的幻觉；
+        # 3. 这里先做结构化提炼，再保留少量代表句，既省 token，也更稳。
+        recent_message_profile = self._build_recent_message_profile(recent_messages)

        prompt_payload = {
            "任务说明": "请基于以下真实素材，为目标成员写一段有传播性的群聊锐评。",
@@ -718,7 +734,7 @@ class MemberRoastPlugin(MessagePluginInterface):
                "长期发言模式": historical_member_profile.get("message_pattern", []),
                "长期互动风格": historical_member_profile.get("interaction_style", []),
                "阶段变化轨迹": historical_member_profile.get("phase_state", []),
-                "历史时间线": historical_member_profile.get("timeline", []),
+                "历史时间线": (historical_member_profile.get("timeline", []) or [])[: self.PROMPT_TIMELINE_LIMIT],
            },
            "群聊背景": {
                "群名": payload.get("group_name", ""),
@@ -730,18 +746,172 @@ class MemberRoastPlugin(MessagePluginInterface):
                "窗口天数": historical_group_profile.get("history_days", self.history_profile_days),
                "总结条数": historical_group_profile.get("summary_count", 0),
                "历史关注主题": historical_group_profile.get("focus_topics", []),
-                "历史时间线": historical_group_profile.get("timeline", []),
+                "历史时间线": (historical_group_profile.get("timeline", []) or [])[: self.PROMPT_TIMELINE_LIMIT],
            },
-            "最近发言样本": recent_lines,
+            "最近200条发言提炼": recent_message_profile,
            "额外要求": [
                "要像熟人看破不说破，不要像机器写分析。",
                "可以先抬后杀，也可以连续抓几个典型症状。",
                "如果这人明显偏技术、答疑、摸鱼、抽象、嘴硬、复读机、群气氛组，请点出来。",
                "必须同时参考“当前成员画像”和“近两个月历史画像”，如果两者有反差，要把这种反差写出来。",
+                "优先依据“高频模式、重复措辞、代表句”下结论，不要因为单条偶发发言脑补大设定。",
+                "如果最近发言提炼和长期画像冲突，允许描述为“最近状态跑偏了”，不要硬判成永久特征。",
                f"发起请求的人是：{requester_name}" if requester_name else "",
            ],
        }
-        return json.dumps(prompt_payload, ensure_ascii=False, indent=2)
+        # 最终再做一次 prompt 压缩：
+        # 1. 去掉空字段，避免模型看到大量“空壳键名”；
+        # 2. 限制超长文本和列表，防止历史摘要把上下文挤爆；
+        # 3. 使用紧凑 JSON，而不是缩进版，直接减少 token。
+        compact_payload = self._compact_prompt_payload(prompt_payload)
+        return json.dumps(compact_payload, ensure_ascii=False, separators=(",", ":"))
+
+    def _compact_prompt_payload(self, value: Any) -> Any:
+        """压缩 prompt 载荷，减少无效 token 并降低幻觉诱因。"""
+        if isinstance(value, dict):
+            compact_dict: Dict[str, Any] = {}
+            for key, item in value.items():
+                compact_item = self._compact_prompt_payload(item)
+                if compact_item in ("", [], {}, None):
+                    continue
+                compact_dict[key] = compact_item
+            return compact_dict
+
+        if isinstance(value, list):
+            compact_list: List[Any] = []
+            for item in value:
+                compact_item = self._compact_prompt_payload(item)
+                if compact_item in ("", [], {}, None):
+                    continue
+                compact_list.append(compact_item)
+            return compact_list
+
+        if isinstance(value, str):
+            text = re.sub(r"\s+", " ", value).strip()
+            if len(text) <= self.PROMPT_TEXT_LIMIT:
+                return text
+            # 长文本只保留前半段关键信息：
+            # 1. 这里主要针对摘要、时间线这类字段；
+            # 2. 它们的作用是“提供背景”，不是让模型逐字精读；
+            # 3. 截断后仍然保留前部主结论，性价比更高。
+            return text[: self.PROMPT_TEXT_LIMIT].rstrip("，,；;、 ") + "…"
+
+        return value
+
+    def _build_recent_message_profile(self, recent_messages: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """把最近消息压缩成更适合给模型的结构化画像。
+
+        压缩目标：
+        1. 尽量保留“重复出现的稳定模式”，而不是平均分配注意力给 200 条原文；
+        2. 用高频短语、关键词、问句/感叹句比例、代表句来降低模型幻觉概率；
+        3. 让模型先看结论，再看少量样本佐证，减少 token 消耗。
+        """
+        normalized_texts: List[str] = []
+        repeated_sentence_counter: Counter[str] = Counter()
+        keyword_counter: Counter[str] = Counter()
+        punct_counter: Counter[str] = Counter()
+        representative_samples: List[str] = []
+
+        for item in recent_messages:
+            text = str(item.get("content", "") or "").replace("\n", " ").strip()
+            if not text:
+                continue
+            text = re.sub(r"\s+", " ", text)
+            normalized_texts.append(text)
+
+            # 统计“几乎原样重复”的短句，这类内容对群聊人设识别价值很高，
+            # 比如复读某个梗、固定口头禅、常见抱怨模板。
+            repeat_key = re.sub(r"\s+", "", text)
+            if 2 <= len(repeat_key) <= 24:
+                repeated_sentence_counter[repeat_key] += 1
+
+            for token in self._extract_recent_message_tokens(text):
+                keyword_counter[token] += 1
+
+            punct_counter["question"] += text.count("?") + text.count("？")
+            punct_counter["exclaim"] += text.count("!") + text.count("！")
+            punct_counter["ellipsis"] += text.count("…") + text.count("...")
+
+        representative_samples = self._pick_representative_samples(normalized_texts)
+        total = max(len(normalized_texts), 1)
+
+        return {
+            "样本条数": len(normalized_texts),
+            "高频短句": [
+                item[: self.PROMPT_TEXT_LIMIT]
+                for item, count in repeated_sentence_counter.most_common(self.RECENT_REPEAT_LIMIT)
+                if count >= 2
+            ],
+            "高频关键词": [
+                item
+                for item, count in keyword_counter.most_common(self.RECENT_KEYWORD_LIMIT)
+                if count >= 2
+            ],
+            "近期语气指标": {
+                "问句占比": round(punct_counter["question"] / total, 3),
+                "感叹句占比": round(punct_counter["exclaim"] / total, 3),
+                "省略号占比": round(punct_counter["ellipsis"] / total, 3),
+            },
+            # 代表句只保留少量，有利于模型“看证据”，又不至于把 token 烧在长聊天流水上。
+            "代表句样本": representative_samples[: self.RECENT_SAMPLE_LIMIT],
+        }
+
+    def _extract_recent_message_tokens(self, text: str) -> List[str]:
+        """从单条消息中提取较稳定的关键词。
+
+        规则尽量保守：
+        1. 中文按 2~6 字连续片段抓取，避免单字噪声；
+        2. 英文/数字词保留长度 >= 3 的 token；
+        3. 过滤掉常见虚词，减少模型被“这个、那个、然后”之类词误导。
+        """
+        tokens: List[str] = []
+        ascii_tokens = re.findall(r"[A-Za-z0-9_./-]{3,32}", text)
+        chinese_tokens = re.findall(r"[\u4e00-\u9fa5]{2,6}", text)
+
+        for token in ascii_tokens + chinese_tokens:
+            normalized = str(token or "").strip().lower()
+            if not normalized:
+                continue
+            if normalized in self.RECENT_MESSAGE_STOPWORDS:
+                continue
+            if normalized.isdigit():
+                continue
+            tokens.append(normalized)
+        return tokens
+
+    @staticmethod
+    def _pick_representative_samples(texts: List[str]) -> List[str]:
+        """挑选少量最能体现人设的代表句。
+
+        选取策略不追求复杂模型，只做确定性压缩：
+        1. 先保留问句、感叹句、较长句、包含“技术/问题/吐槽”味道的句子；
+        2. 再做去重，避免 12 条样本里 8 条都是同一种废话。
+        """
+        scored: List[Tuple[int, str]] = []
+        for text in texts:
+            score = 0
+            if "？" in text or "?" in text:
+                score += 3
+            if "！" in text or "!" in text:
+                score += 2
+            if len(text) >= 18:
+                score += 2
+            if any(keyword in text.lower() for keyword in ["报错", "问题", "哈哈", "笑死", "离谱", "摸鱼", "接口", "配置", "版本", "怎么"]):
+                score += 2
+            scored.append((score, text))
+
+        scored.sort(key=lambda item: (-item[0], -len(item[1])))
+        result: List[str] = []
+        seen = set()
+        for _, text in scored:
+            normalized = re.sub(r"\s+", "", text)
+            if normalized in seen:
+                continue
+            seen.add(normalized)
+            result.append(text[: MemberRoastPlugin.PROMPT_TEXT_LIMIT])
+            if len(result) >= MemberRoastPlugin.RECENT_SAMPLE_LIMIT:
+                break
+        return result

    def _post_process_roast_text(self, roast_text: str, target_name: str) -> str:
        """清洗模型输出，统一成更适合直接发群的格式。"""