From ee1532b2f5e9817e414796cf0de97ee03a58163a Mon Sep 17 00:00:00 2001 From: liuwei Date: Fri, 24 Apr 2026 15:24:48 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=BE=E5=AE=BD=E8=87=AA=E5=8A=A8=E5=9B=9E?= =?UTF-8?q?=E5=A4=8D=E9=95=BF=E5=BA=A6=E4=B8=BA=E8=87=AA=E7=84=B6=E6=B5=AE?= =?UTF-8?q?=E5=8A=A8=E5=B9=B6=E7=BB=9F=E4=B8=8030=E5=AD=97=E4=B8=8A?= =?UTF-8?q?=E9=99=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugins/ai_auto_response/config.toml | 20 ++++++++----- .../ai_auto_response/core/reply_formatter.py | 29 ++++++++++++------- plugins/ai_auto_response/main.py | 9 ++++-- .../profile/persona_engine.py | 6 ++-- 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/plugins/ai_auto_response/config.toml b/plugins/ai_auto_response/config.toml index 791ec4a..2854ef8 100644 --- a/plugins/ai_auto_response/config.toml +++ b/plugins/ai_auto_response/config.toml @@ -56,16 +56,20 @@ memory_lookback_days = 180 active_context_hours = 8 [reply] -social_short_char_limit = 12 -social_short_total_limit = 12 -qa_fast_char_limit = 18 -qa_fast_total_limit = 18 +# 回复长度改成“下限放开、上限约束”的思路: +# 1. 允许模型只回几个字,避免每句都被逼着凑满; +# 2. 统一把本地最终兜底上限放到 30 字,给一句完整观点留空间; +# 3. qa_with_context 仍保留 2 句能力,但总字数同样压住,不让它长成说明文。 +social_short_char_limit = 30 +social_short_total_limit = 30 +qa_fast_char_limit = 30 +qa_fast_total_limit = 30 qa_with_context_sentence_limit = 2 qa_with_context_chunk_limit = 2 -qa_with_context_char_limit = 16 -qa_with_context_total_limit = 28 -default_char_limit = 12 -default_total_limit = 12 +qa_with_context_char_limit = 18 +qa_with_context_total_limit = 30 +default_char_limit = 30 +default_total_limit = 30 [prompt_compact] group_profile_max_chars = 220 diff --git a/plugins/ai_auto_response/core/reply_formatter.py b/plugins/ai_auto_response/core/reply_formatter.py index cd5c005..8c16e7d 100644 --- a/plugins/ai_auto_response/core/reply_formatter.py +++ b/plugins/ai_auto_response/core/reply_formatter.py @@ -52,11 +52,14 @@ def preview_text(text: str, limit: int = 80) -> str: def build_length_rule(reply_mode: str) -> str: if reply_mode == "social_short": - return "默认只回半句到1句,目标10字左右,非必要别超过12字。" + # 长度规则改成“可短可长,但别超过 30 字”: + # 1. 很短的口语接话依然允许,避免模型被目标字数绑住; + # 2. 但仍限制只回 1 句,防止回复重新膨胀成说明文。 + return "默认只回1句,可短到几个字;有必要再说完整一句,但非必要别超过30字。" if reply_mode == "qa_fast": - return "优先1句口语化结论,目标16字内;先给结论,不要展开。" + return "优先1句口语化结论,可短可长;先给结论,不要展开成长说明,非必要别超过30字。" if reply_mode == "qa_with_context": - return "优先1句;必要时最多2句,每句尽量控制在16字内,总体不超过28字。" + return "优先1句;必要时最多2句,但总体仍尽量压在30字内,说完关键判断就收。" return "尽量短,像群友临时接一句,不要长篇大论。" @@ -128,18 +131,22 @@ def _find_split_at(window: str, punctuation: str, lookback: int = 10) -> int: def _resolve_limits(reply_mode: str, limits: Dict) -> Dict[str, int]: mode_defaults = { - "social_short": {"sentence_limit": 1, "char_limit": 12, "chunk_limit": 1, "total_limit": 12}, - "qa_fast": {"sentence_limit": 1, "char_limit": 18, "chunk_limit": 1, "total_limit": 18}, - "qa_with_context": {"sentence_limit": 2, "char_limit": 16, "chunk_limit": 2, "total_limit": 28}, + # 这里的默认值是“本地最终裁剪”的最后一道保险: + # 1. 下限不做要求,模型可以回很短; + # 2. 这里只管兜底上限,避免偶发输出过长; + # 3. 所有模式统一往 30 字附近收,保持群聊接话感。 + "social_short": {"sentence_limit": 1, "char_limit": 30, "chunk_limit": 1, "total_limit": 30}, + "qa_fast": {"sentence_limit": 1, "char_limit": 30, "chunk_limit": 1, "total_limit": 30}, + "qa_with_context": {"sentence_limit": 2, "char_limit": 18, "chunk_limit": 2, "total_limit": 30}, } - defaults = mode_defaults.get(reply_mode, {"sentence_limit": 1, "char_limit": 12, "chunk_limit": 1, "total_limit": 12}) + defaults = mode_defaults.get(reply_mode, {"sentence_limit": 1, "char_limit": 30, "chunk_limit": 1, "total_limit": 30}) if reply_mode == "social_short": return { "sentence_limit": 1, "chunk_limit": 1, "char_limit": max(int(limits.get("social_short_char_limit", defaults["char_limit"]) or defaults["char_limit"]), 8), "total_limit": max(int(limits.get("social_short_total_limit", defaults["total_limit"]) or defaults["total_limit"]), 8), - "default_char_limit": max(int(limits.get("default_char_limit", 12) or 12), 8), + "default_char_limit": max(int(limits.get("default_char_limit", 30) or 30), 8), } if reply_mode == "qa_fast": return { @@ -147,7 +154,7 @@ def _resolve_limits(reply_mode: str, limits: Dict) -> Dict[str, int]: "chunk_limit": 1, "char_limit": max(int(limits.get("qa_fast_char_limit", defaults["char_limit"]) or defaults["char_limit"]), 8), "total_limit": max(int(limits.get("qa_fast_total_limit", defaults["total_limit"]) or defaults["total_limit"]), 8), - "default_char_limit": max(int(limits.get("default_char_limit", 12) or 12), 8), + "default_char_limit": max(int(limits.get("default_char_limit", 30) or 30), 8), } if reply_mode == "qa_with_context": return { @@ -155,14 +162,14 @@ def _resolve_limits(reply_mode: str, limits: Dict) -> Dict[str, int]: "chunk_limit": max(int(limits.get("qa_with_context_chunk_limit", defaults["chunk_limit"]) or defaults["chunk_limit"]), 1), "char_limit": max(int(limits.get("qa_with_context_char_limit", defaults["char_limit"]) or defaults["char_limit"]), 8), "total_limit": max(int(limits.get("qa_with_context_total_limit", defaults["total_limit"]) or defaults["total_limit"]), 8), - "default_char_limit": max(int(limits.get("default_char_limit", 12) or 12), 8), + "default_char_limit": max(int(limits.get("default_char_limit", 30) or 30), 8), } return { "sentence_limit": 1, "chunk_limit": 1, "char_limit": max(int(limits.get("default_char_limit", defaults["char_limit"]) or defaults["char_limit"]), 8), "total_limit": max(int(limits.get("default_total_limit", defaults["total_limit"]) or defaults["total_limit"]), 8), - "default_char_limit": max(int(limits.get("default_char_limit", 12) or 12), 8), + "default_char_limit": max(int(limits.get("default_char_limit", 30) or 30), 8), } diff --git a/plugins/ai_auto_response/main.py b/plugins/ai_auto_response/main.py index 989b18e..88c6243 100644 --- a/plugins/ai_auto_response/main.py +++ b/plugins/ai_auto_response/main.py @@ -1102,8 +1102,13 @@ class AIAutoResponsePlugin(MessagePluginInterface): # 1. 普通 social_short 基本不喂长期记忆,只保留最小现场感; # 2. 明确点名、追问、回归成员时,才适度打开成员记忆; # 3. 群事实和向量记忆只在问答场景打开,避免模型把记忆写进每句闲聊。 - target_reply_chars_map = {"social_short": 10, "qa_fast": 16, "qa_with_context": 24} - hard_reply_cap_map = {"social_short": 12, "qa_fast": 18, "qa_with_context": 28} + # + # 这里把长度策略改成“下限放开、上限约束”: + # 1. 不再要求模型默认说到 20~30 字,避免每句都像刻意凑长度; + # 2. target_reply_chars 只保留一个偏短的参考值,方便模型自然收放; + # 3. hard_reply_cap 才是关键兜底,统一限制别超过 30 字,保持群聊轻量感。 + target_reply_chars_map = {"social_short": 12, "qa_fast": 16, "qa_with_context": 20} + hard_reply_cap_map = {"social_short": 30, "qa_fast": 30, "qa_with_context": 30} # 最近消息条数不再按模式缩到 4~6 条,而是统一交给模型看完整窗口: # 1. 回复仍然走短句限制,避免“上下文多了,回复也跟着变长”; # 2. 但模型理解当前讨论时,需要看到完整现场,尤其是多人连续接话场景; diff --git a/plugins/ai_auto_response/profile/persona_engine.py b/plugins/ai_auto_response/profile/persona_engine.py index c46dde3..e6085fe 100644 --- a/plugins/ai_auto_response/profile/persona_engine.py +++ b/plugins/ai_auto_response/profile/persona_engine.py @@ -49,11 +49,11 @@ class PersonaEngine: # 这里把“短”从模糊描述改成明确字数目标,避免模型虽然知道要短, # 但仍然习惯性输出完整说明句,导致真人感被拉低。 if reply_mode == "social_short": - return "默认只回半句到一句,目标 4 到 10 个字,非必要别超过 16 个字。" + return "默认只回一句,可短到几个字;有必要再说完整一句,但非必要别超过 30 个字。" if reply_mode == "qa_fast": - return "优先一句口语化结论,目标 8 到 16 个字,非必要别超过 22 个字。" + return "优先一句口语化结论,可短可长,但非必要别超过 30 个字。" if reply_mode == "qa_with_context": - return "先给结论,再补一个关键点;最多 2 句,总体尽量压在 28 个字内。" + return "先给结论,再补一个关键点;最多 2 句,但总体尽量压在 30 个字内。" return "默认按群友顺手接话来回,宁可短一点,也别写完整说明文。" def _build_presets(self) -> Dict[str, Dict]: