精炼成员锐评的提示词与输入压缩

- 将最近200条发言改为结构化提炼并限制代表样本数量 - 压缩历史时间线和空字段，减少提示词体积并降低幻觉 - 下调模型温度与最大输出token，并修复历史窗口属性兜底
2026-04-27 14:52:52 +08:00
parent e2a6356bab
commit 81dfd44b14
2 changed files with 195 additions and 21 deletions
--- a/plugins/member_roast/config.toml
+++ b/plugins/member_roast/config.toml
@@ -14,8 +14,12 @@ command_format = """
 # 2. “锐评”更需要创意表达、梗感和自然语言发挥；
 # 3. 因此优先复用 chat.main，既稳定又方便后续单独切换模型。
 scene = "chat.main"
-temperature = 0.9
+# 模型参数往“稳一点、省一点”方向收：
-max_tokens = 700
+# 1. 锐评需要一点攻击性，但不需要高温到处乱飞；
 # 2. 最近发言已经被结构化提炼，正文也限制在一屏内，token 上限没必要放太大；
 # 3. 这样可以明显降低幻觉概率和单次调用成本。
 temperature = 0.65
 max_tokens = 480
 timeout_seconds = 120
 max_retries = 2
 retry_delay_seconds = 1.5
--- a/plugins/member_roast/main.py
+++ b/plugins/member_roast/main.py
@@ -2,6 +2,7 @@
 import json
 import re
 import xml.etree.ElementTree as ET
 from collections import Counter
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional, Tuple
@@ -336,6 +337,21 @@ class MemberRoastPlugin(MessagePluginInterface):
    FEATURE_KEY = "MEMBER_ROAST"
    FEATURE_DESCRIPTION = "🗡️ 成员锐评 [@机器人 锐评一下 @某人]"
    RECENT_MESSAGE_STOPWORDS = {
        "这个", "那个", "就是", "然后", "但是", "还是", "我们", "你们", "他们", "自己", "一下",
        "已经", "没有", "一个", "可以", "什么", "怎么", "今天", "昨天", "现在", "时候", "知道",
        "觉得", "真的", "感觉", "不是", "还有", "因为", "所以", "这里", "那里", "一下子", "的话",
        "and", "the", "for", "with", "that", "this", "from", "have", "just", "like",
    }
    # 最近消息提炼后的各项上限统一收在这里：
    # 1. 方便后续继续调 token 成本时只改一处；
    # 2. 避免不同方法里散落硬编码，导致线上效果不一致；
    # 3. 上限偏保守，优先保“稳定特征”而不是堆材料。
    RECENT_REPEAT_LIMIT = 6
    RECENT_KEYWORD_LIMIT = 12
    RECENT_SAMPLE_LIMIT = 8
    PROMPT_TIMELINE_LIMIT = 6
    PROMPT_TEXT_LIMIT = 120
    @property
    def name(self) -> str:
@@ -383,6 +399,11 @@ class MemberRoastPlugin(MessagePluginInterface):
        self.min_output_chars = 140
        self.sharpness_level = "high"
        self.name_match_min_chars = 2
        # 这里给一个默认值兜底：
        # 1. `_build_user_prompt` 会直接使用该窗口天数；
        # 2. 如果插件刚构造、但还没完整初始化就被调用，至少不会因为属性不存在直接报错；
        # 3. 真正运行时仍会在 `initialize` 里按配置覆盖。
        self.history_profile_days = 60
    def initialize(self, context: Dict[str, Any]) -> bool:
        """初始化插件。"""
@@ -405,6 +426,11 @@ class MemberRoastPlugin(MessagePluginInterface):
        profile_cfg = self._config.get("profile", {}) or {}
        self.name_match_min_chars = max(int(profile_cfg.get("name_match_min_chars", 2) or 2), 1)
        # 历史窗口需要同步到插件实例本身：
        # 1. prompt 组装阶段会直接引用它；
        # 2. 之前只有 service 上有这个值，运行时存在属性缺失风险；
        # 3. 这里和 service 保持同一配置口径，避免“两边窗口不一致”。
        self.history_profile_days = max(int(profile_cfg.get("history_profile_days", 60) or 60), 1)
        db_manager = context.get("db_manager")
        if not db_manager:
@@ -662,21 +688,11 @@ class MemberRoastPlugin(MessagePluginInterface):
        meta = member_context.get("meta", {}) or {}
        group_style = group_memory_profile.get("style_profile", {}) or {}
-        # 最近 50 条发言是这次锐评最关键的“即时素材”：
+        # 最近 200 条消息不再原样整包塞给模型：
-        # 1. 画像决定“这个人长期像谁”；
+        # 1. 原样传会让 token 体积迅速膨胀；
-        # 2. 最近发言决定“这阵子他又在发什么病”；
+        # 2. 模型也容易被偶发句子带偏，出现“抓住一条就开始瞎判”的幻觉；
-        # 3. 两者结合，模型才更容易产出既稳定又有当期节目效果的锐评。
+        # 3. 这里先做结构化提炼，再保留少量代表句，既省 token，也更稳。
-        recent_lines = []
+        recent_message_profile = self._build_recent_message_profile(recent_messages)
        for idx, item in enumerate(recent_messages, start=1):
            ts = item.get("timestamp")
            if isinstance(ts, datetime):
                ts_text = ts.strftime("%m-%d %H:%M")
            else:
                ts_text = str(ts or "")[5:16] if str(ts or "") else ""
            content = str(item.get("content", "") or "").replace("\n", " ").strip()
            if not content:
                continue
            recent_lines.append(f"{idx}. [{ts_text}] {content}")
        prompt_payload = {
            "任务说明": "请基于以下真实素材，为目标成员写一段有传播性的群聊锐评。",
@@ -718,7 +734,7 @@ class MemberRoastPlugin(MessagePluginInterface):
                "长期发言模式": historical_member_profile.get("message_pattern", []),
                "长期互动风格": historical_member_profile.get("interaction_style", []),
                "阶段变化轨迹": historical_member_profile.get("phase_state", []),
-                "历史时间线": historical_member_profile.get("timeline", []),
+                "历史时间线": (historical_member_profile.get("timeline", []) or [])[: self.PROMPT_TIMELINE_LIMIT],
            },
            "群聊背景": {
                "群名": payload.get("group_name", ""),
@@ -730,18 +746,172 @@ class MemberRoastPlugin(MessagePluginInterface):
                "窗口天数": historical_group_profile.get("history_days", self.history_profile_days),
                "总结条数": historical_group_profile.get("summary_count", 0),
                "历史关注主题": historical_group_profile.get("focus_topics", []),
-                "历史时间线": historical_group_profile.get("timeline", []),
+                "历史时间线": (historical_group_profile.get("timeline", []) or [])[: self.PROMPT_TIMELINE_LIMIT],
            },
-            "最近发言样本": recent_lines,
+            "最近200条发言提炼": recent_message_profile,
            "额外要求": [
                "要像熟人看破不说破，不要像机器写分析。",
                "可以先抬后杀，也可以连续抓几个典型症状。",
                "如果这人明显偏技术、答疑、摸鱼、抽象、嘴硬、复读机、群气氛组，请点出来。",
                "必须同时参考“当前成员画像”和“近两个月历史画像”，如果两者有反差，要把这种反差写出来。",
                "优先依据“高频模式、重复措辞、代表句”下结论，不要因为单条偶发发言脑补大设定。",
                "如果最近发言提炼和长期画像冲突，允许描述为“最近状态跑偏了”，不要硬判成永久特征。",
                f"发起请求的人是：{requester_name}" if requester_name else "",
            ],
        }
-        return json.dumps(prompt_payload, ensure_ascii=False, indent=2)
+        # 最终再做一次 prompt 压缩：
        # 1. 去掉空字段，避免模型看到大量“空壳键名”；
        # 2. 限制超长文本和列表，防止历史摘要把上下文挤爆；
        # 3. 使用紧凑 JSON，而不是缩进版，直接减少 token。
        compact_payload = self._compact_prompt_payload(prompt_payload)
        return json.dumps(compact_payload, ensure_ascii=False, separators=(",", ":"))
    def _compact_prompt_payload(self, value: Any) -> Any:
        """压缩 prompt 载荷，减少无效 token 并降低幻觉诱因。"""
        if isinstance(value, dict):
            compact_dict: Dict[str, Any] = {}
            for key, item in value.items():
                compact_item = self._compact_prompt_payload(item)
                if compact_item in ("", [], {}, None):
                    continue
                compact_dict[key] = compact_item
            return compact_dict
        if isinstance(value, list):
            compact_list: List[Any] = []
            for item in value:
                compact_item = self._compact_prompt_payload(item)
                if compact_item in ("", [], {}, None):
                    continue
                compact_list.append(compact_item)
            return compact_list
        if isinstance(value, str):
            text = re.sub(r"\s+", " ", value).strip()
            if len(text) <= self.PROMPT_TEXT_LIMIT:
                return text
            # 长文本只保留前半段关键信息：
            # 1. 这里主要针对摘要、时间线这类字段；
            # 2. 它们的作用是“提供背景”，不是让模型逐字精读；
            # 3. 截断后仍然保留前部主结论，性价比更高。
            return text[: self.PROMPT_TEXT_LIMIT].rstrip("，,；;、 ") + "…"
        return value
    def _build_recent_message_profile(self, recent_messages: List[Dict[str, Any]]) -> Dict[str, Any]:
        """把最近消息压缩成更适合给模型的结构化画像。
        压缩目标：
        1. 尽量保留“重复出现的稳定模式”，而不是平均分配注意力给 200 条原文；
        2. 用高频短语、关键词、问句/感叹句比例、代表句来降低模型幻觉概率；
        3. 让模型先看结论，再看少量样本佐证，减少 token 消耗。
        """
        normalized_texts: List[str] = []
        repeated_sentence_counter: Counter[str] = Counter()
        keyword_counter: Counter[str] = Counter()
        punct_counter: Counter[str] = Counter()
        representative_samples: List[str] = []
        for item in recent_messages:
            text = str(item.get("content", "") or "").replace("\n", " ").strip()
            if not text:
                continue
            text = re.sub(r"\s+", " ", text)
            normalized_texts.append(text)
            # 统计“几乎原样重复”的短句，这类内容对群聊人设识别价值很高，
            # 比如复读某个梗、固定口头禅、常见抱怨模板。
            repeat_key = re.sub(r"\s+", "", text)
            if 2 <= len(repeat_key) <= 24:
                repeated_sentence_counter[repeat_key] += 1
            for token in self._extract_recent_message_tokens(text):
                keyword_counter[token] += 1
            punct_counter["question"] += text.count("?") + text.count("？")
            punct_counter["exclaim"] += text.count("!") + text.count("！")
            punct_counter["ellipsis"] += text.count("…") + text.count("...")
        representative_samples = self._pick_representative_samples(normalized_texts)
        total = max(len(normalized_texts), 1)
        return {
            "样本条数": len(normalized_texts),
            "高频短句": [
                item[: self.PROMPT_TEXT_LIMIT]
                for item, count in repeated_sentence_counter.most_common(self.RECENT_REPEAT_LIMIT)
                if count >= 2
            ],
            "高频关键词": [
                item
                for item, count in keyword_counter.most_common(self.RECENT_KEYWORD_LIMIT)
                if count >= 2
            ],
            "近期语气指标": {
                "问句占比": round(punct_counter["question"] / total, 3),
                "感叹句占比": round(punct_counter["exclaim"] / total, 3),
                "省略号占比": round(punct_counter["ellipsis"] / total, 3),
            },
            # 代表句只保留少量，有利于模型“看证据”，又不至于把 token 烧在长聊天流水上。
            "代表句样本": representative_samples[: self.RECENT_SAMPLE_LIMIT],
        }
    def _extract_recent_message_tokens(self, text: str) -> List[str]:
        """从单条消息中提取较稳定的关键词。
        规则尽量保守：
        1. 中文按 2~6 字连续片段抓取，避免单字噪声；
        2. 英文/数字词保留长度 >= 3 的 token；
        3. 过滤掉常见虚词，减少模型被“这个、那个、然后”之类词误导。
        """
        tokens: List[str] = []
        ascii_tokens = re.findall(r"[A-Za-z0-9_./-]{3,32}", text)
        chinese_tokens = re.findall(r"[\u4e00-\u9fa5]{2,6}", text)
        for token in ascii_tokens + chinese_tokens:
            normalized = str(token or "").strip().lower()
            if not normalized:
                continue
            if normalized in self.RECENT_MESSAGE_STOPWORDS:
                continue
            if normalized.isdigit():
                continue
            tokens.append(normalized)
        return tokens
    @staticmethod
    def _pick_representative_samples(texts: List[str]) -> List[str]:
        """挑选少量最能体现人设的代表句。
        选取策略不追求复杂模型，只做确定性压缩：
        1. 先保留问句、感叹句、较长句、包含“技术/问题/吐槽”味道的句子；
        2. 再做去重，避免 12 条样本里 8 条都是同一种废话。
        """
        scored: List[Tuple[int, str]] = []
        for text in texts:
            score = 0
            if "？" in text or "?" in text:
                score += 3
            if "！" in text or "!" in text:
                score += 2
            if len(text) >= 18:
                score += 2
            if any(keyword in text.lower() for keyword in ["报错", "问题", "哈哈", "笑死", "离谱", "摸鱼", "接口", "配置", "版本", "怎么"]):
                score += 2
            scored.append((score, text))
        scored.sort(key=lambda item: (-item[0], -len(item[1])))
        result: List[str] = []
        seen = set()
        for _, text in scored:
            normalized = re.sub(r"\s+", "", text)
            if normalized in seen:
                continue
            seen.add(normalized)
            result.append(text[: MemberRoastPlugin.PROMPT_TEXT_LIMIT])
            if len(result) >= MemberRoastPlugin.RECENT_SAMPLE_LIMIT:
                break
        return result
    def _post_process_roast_text(self, roast_text: str, target_name: str) -> str:
        """清洗模型输出，统一成更适合直接发群的格式。"""