From 5dc72bf7d2b2ac79001ac537bfb0efdf3872a16b Mon Sep 17 00:00:00 2001 From: liuwei Date: Fri, 24 Apr 2026 15:12:42 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=BE=E5=AE=BD=E6=9C=80=E8=BF=91=E4=B8=8A?= =?UTF-8?q?=E4=B8=8B=E6=96=87=E5=88=B030=E6=9D=A1=E5=B9=B6=E5=8F=96?= =?UTF-8?q?=E6=B6=88=E4=B8=AD=E9=80=94=E6=88=AA=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugins/ai_auto_response/config.toml | 12 ++++-- .../context/context_builder.py | 38 ++++--------------- plugins/ai_auto_response/main.py | 25 +++++++++++- 3 files changed, 38 insertions(+), 37 deletions(-) diff --git a/plugins/ai_auto_response/config.toml b/plugins/ai_auto_response/config.toml index a9144ea..0d2235d 100644 --- a/plugins/ai_auto_response/config.toml +++ b/plugins/ai_auto_response/config.toml @@ -70,10 +70,14 @@ default_total_limit = 12 [prompt_compact] group_profile_max_chars = 220 group_profile_max_lines = 6 -context_max_chars = 360 -context_max_lines = 10 -recent_message_max_lines = 4 -recent_message_line_max_chars = 60 +# 最近上下文现在要真实交给模型 30 条,因此这里同步放宽整体上下文裁剪阈值: +# 1. recent_message_max_lines 提到 30,避免“窗口明明有 30,提示词里只留下 4 条”; +# 2. context_max_lines/context_max_chars 一起抬高,避免最近消息刚拼进去又被整体截断; +# 3. recent_message_line_max_chars 稍微放宽,让模型能看到每条消息更多细节,但仍避免单条刷屏。 +context_max_chars = 4200 +context_max_lines = 40 +recent_message_max_lines = 30 +recent_message_line_max_chars = 100 at_member_profile_max_chars = 160 at_member_profile_max_lines = 5 member_memory_max_chars = 180 diff --git a/plugins/ai_auto_response/context/context_builder.py b/plugins/ai_auto_response/context/context_builder.py index 3084201..1d158ad 100644 --- a/plugins/ai_auto_response/context/context_builder.py +++ b/plugins/ai_auto_response/context/context_builder.py @@ -94,38 +94,14 @@ class ContextBuilder: ) -> List[Dict]: if not recent_messages: return [] + # 这里直接把“最近 N 条”原样交给后续提示词层,而不是再做一次相关性裁剪: + # 1. 用户明确要求给模型 30 条最近消息,方便推断群里正在讨论的上下文; + # 2. 之前的“相关性筛选 + 尾部保留”虽然更省 token,但会打断对话连续性; + # 3. 对群聊场景来说,连续现场通常比少量高分片段更有利于模型判断谁在接谁的话。 + # + # 这里仍保留签名参数不动,是为了兼容上层调用,避免后续改动牵连太多。 window = recent_messages[-self.recent_context_size:] - if len(window) <= 8: - return window - - current_tokens = self._extract_topic_tokens(current_content) - quote_tokens = self._extract_topic_tokens( - f"{quote_context.get('title', '')} {quote_context.get('quote_body', '')}" - ) - focus_tokens = current_tokens | quote_tokens - quote_sender_name = str(quote_context.get("quote_sender_name", "") or "").strip().lower() - - scored: List[tuple[int, int, Dict]] = [] - for idx, item in enumerate(window): - score = self._message_relevance( - item, - current_sender=current_sender, - focus_tokens=focus_tokens, - quote_sender_name=quote_sender_name, - ) - if score > 0: - scored.append((score, idx, item)) - - # 总是保留尾部几条,维持现场感;再拼上与当前话题最相关的消息。 - tail_indexes = set(range(max(len(window) - 4, 0), len(window))) - keep_indexes = set(tail_indexes) - for _, idx, _ in sorted(scored, key=lambda x: (-x[0], -x[1]))[:10]: - keep_indexes.add(idx) - - selected = [window[idx] for idx in sorted(keep_indexes)] - if len(selected) < 6: - return window[-6:] - return selected[-12:] + return window @classmethod def _message_relevance( diff --git a/plugins/ai_auto_response/main.py b/plugins/ai_auto_response/main.py index 441cb84..936d72c 100644 --- a/plugins/ai_auto_response/main.py +++ b/plugins/ai_auto_response/main.py @@ -852,7 +852,16 @@ class AIAutoResponsePlugin(MessagePluginInterface): "最近上下文", self._join_recent_messages( context, - max_lines=int(prompt_strategy.get("recent_message_max_lines", 4) or 4), + # 这里优先走 prompt_strategy,是为了让“给模型看多少条最近消息”由策略层统一控制; + # 如果策略层没有明确给值,再退回配置里的 recent_message_max_lines, + # 避免出现“配置已经改成 30,但这里还偷偷按 4 条截断”的问题。 + max_lines=int( + prompt_strategy.get( + "recent_message_max_lines", + self.prompt_compact_config.get("recent_message_max_lines", 30), + ) + or 30 + ), max_line_chars=int(self.prompt_compact_config.get("recent_message_line_max_chars", 60) or 60), ), ), @@ -1052,7 +1061,19 @@ class AIAutoResponsePlugin(MessagePluginInterface): # 3. 群事实和向量记忆只在问答场景打开,避免模型把记忆写进每句闲聊。 target_reply_chars_map = {"social_short": 10, "qa_fast": 16, "qa_with_context": 24} hard_reply_cap_map = {"social_short": 12, "qa_fast": 18, "qa_with_context": 28} - recent_lines_map = {"social_short": 4, "qa_fast": 5, "qa_with_context": 6} + # 最近消息条数不再按模式缩到 4~6 条,而是统一交给模型看完整窗口: + # 1. 回复仍然走短句限制,避免“上下文多了,回复也跟着变长”; + # 2. 但模型理解当前讨论时,需要看到完整现场,尤其是多人连续接话场景; + # 3. 默认读取 prompt_compact.recent_message_max_lines,这样配置和策略不会打架。 + configured_recent_lines = max( + int(self.prompt_compact_config.get("recent_message_max_lines", 30) or 30), + 1, + ) + recent_lines_map = { + "social_short": configured_recent_lines, + "qa_fast": configured_recent_lines, + "qa_with_context": configured_recent_lines, + } allow_member_memory = strong_directed or is_followup or returning_state in {"returning_member", "long_absent_member"} allow_social_memory = is_question_like and strong_directed