From 66b0fe16daee245dcf7e33804aa35b680f5e2d1c Mon Sep 17 00:00:00 2001 From: liuwei Date: Fri, 24 Apr 2026 15:19:14 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8B=86=E5=88=86=E7=BE=A4=E6=98=B5=E7=A7=B0?= =?UTF-8?q?=E4=B8=8E=E6=AD=A3=E6=96=87=E9=81=BF=E5=85=8D=E8=AF=9D=E9=A2=98?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E8=A2=AB=E6=98=B5=E7=A7=B0=E6=B1=A1=E6=9F=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugins/ai_auto_response/config.toml | 2 +- .../context/context_builder.py | 39 ++++++++++++-- .../docs/README_dify_simple_workflow.md | 13 ++++- .../docs/小牛群内自动插话AI.yml | 7 +++ plugins/ai_auto_response/main.py | 53 +++++++++++++++++-- 5 files changed, 104 insertions(+), 10 deletions(-) diff --git a/plugins/ai_auto_response/config.toml b/plugins/ai_auto_response/config.toml index 0d2235d..791ec4a 100644 --- a/plugins/ai_auto_response/config.toml +++ b/plugins/ai_auto_response/config.toml @@ -74,7 +74,7 @@ group_profile_max_lines = 6 # 1. recent_message_max_lines 提到 30,避免“窗口明明有 30,提示词里只留下 4 条”; # 2. context_max_lines/context_max_chars 一起抬高,避免最近消息刚拼进去又被整体截断; # 3. recent_message_line_max_chars 稍微放宽,让模型能看到每条消息更多细节,但仍避免单条刷屏。 -context_max_chars = 4200 +context_max_chars = 5600 context_max_lines = 40 recent_message_max_lines = 30 recent_message_line_max_chars = 100 diff --git a/plugins/ai_auto_response/context/context_builder.py b/plugins/ai_auto_response/context/context_builder.py index 1d158ad..6bc77f6 100644 --- a/plugins/ai_auto_response/context/context_builder.py +++ b/plugins/ai_auto_response/context/context_builder.py @@ -30,11 +30,19 @@ class ContextBuilder: ) -> Dict: selected_messages = self._select_recent_messages(recent_messages, sender, content, quote_context or {}) recent_lines = [] - for item in selected_messages: + for idx, item in enumerate(selected_messages, start=1): msg_sender = item.get("sender_name") or item.get("sender") or "未知成员" msg_content = item.get("content") or item.get("message") or "" if msg_content: - recent_lines.append(f"{msg_sender}: {msg_content}") + # 这里把“发言人”和“正文”拆开保存,避免后续模型把昵称词汇误当成讨论主题。 + recent_lines.append( + self._format_recent_message_line( + idx=idx, + sender_name=str(msg_sender), + content=str(msg_content), + is_at=bool(item.get("is_at")), + ) + ) return { "group_profile": group_profile or {"room_id": room_id}, "speaker_profile": { @@ -67,7 +75,7 @@ class ContextBuilder: "image_safety_prompt": self._build_image_safety_prompt( (quote_context or {}).get("image_safety") or {} ), - "current_message": f"{sender_name}: {content}", + "current_message": self._format_current_message_block(sender_name, content), } @staticmethod @@ -85,6 +93,31 @@ class ContextBuilder: }) return items + @staticmethod + def _sanitize_inline_field(value: str, max_chars: int = 120) -> str: + # 统一把换行和分隔符清掉,避免后续在单行结构化文本里把字段边界冲散。 + text = re.sub(r"\s+", " ", str(value or "")).strip() + text = text.replace("|", "/") + if len(text) > max_chars: + return text[: max_chars - 3].rstrip() + "..." + return text + + @classmethod + def _format_recent_message_line(cls, idx: int, sender_name: str, content: str, is_at: bool = False) -> str: + sender = cls._sanitize_inline_field(sender_name, max_chars=24) or "未知成员" + body = cls._sanitize_inline_field(content, max_chars=120) + parts = [f"[{idx:02d}]", f"发言人={sender}", f"正文={body}"] + if is_at: + parts.append("@bot=Y") + return " | ".join(parts) + + @classmethod + def _format_current_message_block(cls, sender_name: str, content: str) -> str: + # 当前消息改成“元信息 + 正文”两段式,方便模型只把正文视为话题语义来源。 + sender = cls._sanitize_inline_field(sender_name, max_chars=24) or "未知成员" + body = cls._sanitize_inline_field(content, max_chars=500) + return f"发言人={sender}\n正文={body}" + def _select_recent_messages( self, recent_messages: List[Dict], diff --git a/plugins/ai_auto_response/docs/README_dify_simple_workflow.md b/plugins/ai_auto_response/docs/README_dify_simple_workflow.md index 810513a..2b5c44e 100644 --- a/plugins/ai_auto_response/docs/README_dify_simple_workflow.md +++ b/plugins/ai_auto_response/docs/README_dify_simple_workflow.md @@ -17,7 +17,14 @@ 成员记忆、群关系记忆、群事实记忆、向量召回记忆的合并摘要。 `current_message` -当前消息,格式类似:`张三: 你还活着吗` +当前消息,格式类似: + +```text +发言人=张三 +正文=你还活着吗 +``` + +其中 `发言人` 是元信息,`正文` 才是当前消息内容本身,不要把昵称里的词当成话题关键词。 `control` 控制信息,格式类似: @@ -59,6 +66,10 @@ address_style=低频称呼,默认直接接话 6. 信息不足就收着说,不要硬编。 7. 回复尽量短,但要保留人格味道。 8. 只输出一个 JSON 对象,不要输出解释。 +9. 如果上下文或当前消息里出现 `发言人=...`、`正文=...`: + - `发言人` 只是识别谁在说话 + - `正文` 才是话题内容 + - 不要把昵称、群名片、外号中的词汇误判成正在讨论的话题 输出格式: { diff --git a/plugins/ai_auto_response/docs/小牛群内自动插话AI.yml b/plugins/ai_auto_response/docs/小牛群内自动插话AI.yml index 5cb04b6..ee714c7 100644 --- a/plugins/ai_auto_response/docs/小牛群内自动插话AI.yml +++ b/plugins/ai_auto_response/docs/小牛群内自动插话AI.yml @@ -242,6 +242,10 @@ workflow: - 优先使用 social_short - 用符合人格的一句短回怼挡回去 - 不要长篇说教,不要爆粗,不要升级成真正对骂 + 9. 上下文和当前消息里如果出现 `发言人=...`、`正文=...` 这样的结构: + - `发言人` 只是说话人元信息,用来判断对象、关系、是否在点名 + - `正文` 才是话题和语义内容 + - 不要把昵称、群名片、外号里的词当成当前讨论主题 输出格式: { @@ -359,6 +363,9 @@ workflow: - 这里优先短回一句,不要空掉 - 用 social_short - 回得短、稳、带人格,但不要说教,不要骂脏话 + 7. 上下文和当前消息里如果出现 `发言人=...`、`正文=...`: + - 只把 `正文` 当作话题内容 + - `发言人` 只用于识别是谁在说话,不要把昵称里的词汇当成讨论主题 输出格式: { diff --git a/plugins/ai_auto_response/main.py b/plugins/ai_auto_response/main.py index 936d72c..989b18e 100644 --- a/plugins/ai_auto_response/main.py +++ b/plugins/ai_auto_response/main.py @@ -958,7 +958,8 @@ class AIAutoResponsePlugin(MessagePluginInterface): "group_profile": group_profile_text, "context": context_text, "memory": memory_text, - "current_message": f"{sender_name}: {content}", + # 当前消息不再用“昵称: 正文”的混合写法,避免模型把昵称词汇当成当前话题的一部分。 + "current_message": self._format_current_message_block(sender_name, content), "control": "\n".join(control_lines), "images": files, } @@ -1005,12 +1006,54 @@ class AIAutoResponsePlugin(MessagePluginInterface): sender = str(item.get("sender", "") or "未知成员").strip() content = str(item.get("content", "") or "").strip() if sender and content: - compact = re.sub(r"\s+", " ", content).strip() - if len(compact) > max_line_chars: - compact = compact[: max_line_chars - 3].rstrip() + "..." - lines.append(f"{sender}: {compact}") + # 最近消息统一改成“发言人字段 + 正文字段”的单行结构化格式: + # 1. 保留 30 条上下文时,仍然是一条消息一行,不会因为多行格式把上下文窗口挤爆; + # 2. 模型可以继续感知是谁说的,但更不容易把昵称里的词误当成话题正文; + # 3. 如果消息里本身带 @ 标记,也显式单列出来,减少对正文理解的污染。 + lines.append( + AIAutoResponsePlugin._format_recent_message_line( + idx=int(item.get("idx", 0) or 0), + sender_name=sender, + content=content, + max_line_chars=max_line_chars, + is_at=bool(item.get("is_at")), + ) + ) return "\n".join(lines) + @staticmethod + def _sanitize_inline_message_field(value: str, max_chars: int) -> str: + # 这里专门给传模型的“单行结构化消息”做字段清洗,避免换行和分隔符把结构打散。 + text = re.sub(r"\s+", " ", str(value or "")).strip() + text = text.replace("|", "/") + if len(text) > max_chars: + return text[: max_chars - 3].rstrip() + "..." + return text + + @classmethod + def _format_recent_message_line( + cls, + *, + idx: int, + sender_name: str, + content: str, + max_line_chars: int, + is_at: bool = False, + ) -> str: + sender = cls._sanitize_inline_message_field(sender_name, max_chars=24) or "未知成员" + body = cls._sanitize_inline_message_field(content, max_chars=max(max_line_chars, 20)) + parts = [f"[{max(idx, 1):02d}]", f"发言人={sender}", f"正文={body}"] + if is_at: + parts.append("@bot=Y") + return " | ".join(parts) + + @classmethod + def _format_current_message_block(cls, sender_name: str, content: str) -> str: + # 当前消息使用两行结构化文本,让工作流里的模型更容易区分“谁说的”和“说了什么”。 + sender = cls._sanitize_inline_message_field(sender_name, max_chars=24) or "未知成员" + body = cls._sanitize_inline_message_field(content, max_chars=500) + return f"发言人={sender}\n正文={body}" + @staticmethod def _string_block(title: str, value: Any) -> str: text = str(value or "").strip()