From e7d68a89c25dd9b7d4cf698a12bca28a1e30dc2d Mon Sep 17 00:00:00 2001
From: liuwei <liuwei@wdtrgf.com.cn>
Date: Tue, 28 Apr 2026 17:41:02 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=8B=E6=94=BE=20ai=5Fauto=5Fresponse=20?=
 =?UTF-8?q?=E5=8F=82=E4=B8=8E=E5=88=A4=E6=96=AD=E5=88=B0=20LLM?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

变更项：
1. 新增 decision 配置并重写 ResponsePlanner，将是否送模型的本地粗筛收缩为轻量入口判断，允许在主动参与开启时更多普通文本进入模型。
2. 将 cooldown 从模型前挡板后移到 LLM 判定 should_reply 之后，改为发送闸门，减少本地提前拦截。
3. 调整上下文与 prompt 控制信息，明确 reply_mode 只是本地 hint，并把 acceptance_state、solver 等信号直接下放给模型统一判断是否参与和如何回复。
---
 plugins/ai_auto_response/config.toml          |  12 ++
 .../context/context_builder.py                |  10 ++
 .../ai_auto_response/core/decision_flow.py    |   9 +-
 .../ai_auto_response/core/prompt_builder.py   |   9 +-
 .../ai_auto_response/core/response_planner.py | 145 ++++++++----------
 plugins/ai_auto_response/main.py              |  76 ++++++---
 6 files changed, 157 insertions(+), 104 deletions(-)

diff --git a/plugins/ai_auto_response/config.toml b/plugins/ai_auto_response/config.toml
index 73be5a1..419fefd 100644
--- a/plugins/ai_auto_response/config.toml
+++ b/plugins/ai_auto_response/config.toml
@@ -55,6 +55,18 @@ long_absent_member_days = 30
 memory_lookback_days = 180
 active_context_hours = 8
 
+[decision]
+# 决策层改成“LLM 优先裁决”：
+# 1. 本地尽量只做硬过滤和极轻的入口判断；
+# 2. 是否参与聊天、是否回复、最终 reply_mode 尽量交给模型自己输出 should_reply 来决定；
+# 3. 这样会增加一定模型调用量，但能明显减少本地规则把语境复杂消息提前拦掉的问题。
+model_decides_participation = true
+# 当 allow_proactive_reply=true 时，这里允许普通群聊文本也送模型：
+# 1. 本地不再要求必须先命中 question/topic/followup 等信号才进模型；
+# 2. 最终由模型结合最近消息、群接受度、是否已有真人在答等信号统一判断 should_reply；
+# 3. 如果后续觉得成本又偏高，可以只把这个开关关掉，而不必回退整套旧规则。
+allow_all_text_when_proactive = true
+
 [reply]
 # 回复长度改成“下限放开、上限约束”的思路：
 # 1. 允许模型只回几个字，避免每句都被逼着凑满；
diff --git a/plugins/ai_auto_response/context/context_builder.py b/plugins/ai_auto_response/context/context_builder.py
index 9316d31..2f7ee55 100644
--- a/plugins/ai_auto_response/context/context_builder.py
+++ b/plugins/ai_auto_response/context/context_builder.py
@@ -53,8 +53,18 @@ class ContextBuilder:
             "speaker_name_clean": self._clean_display_name(sender_name),
             "is_at": bool(trigger.get("is_at", False)),
             "is_directed": bool(trigger.get("is_directed", False)),
+            # 这些轻量触发标记继续保留到上下文里：
+            # 1. 现在我们准备把“是否参与聊天”更多地下放给模型；
+            # 2. 因此模型需要直接看到这些基础信号，而不是只吃本地裁剪后的 reply_mode；
+            # 3. 这样它能自己判断“这是普通问句、追问、社交招呼，还是纯路过消息”。
+            "question_detected": bool(trigger.get("question_detected", False)),
+            "is_question": bool(trigger.get("is_question", False)),
+            "is_followup": bool(trigger.get("is_followup", False)),
+            "is_social_call": bool(trigger.get("is_social_call", False)),
+            "is_returning_member": bool(trigger.get("is_returning_member", False)),
             # 这类标记会被后面的 prompt 策略层消费，用来决定要不要放开群级记忆。
             "is_group_memory_query": bool(trigger.get("is_group_memory_query", False)),
+            "topic": str(trigger.get("topic", "") or ""),
             "recent_message_items": self._build_recent_message_items(selected_messages),
             "recent_messages": recent_lines,
             "recent_summary": "",
diff --git a/plugins/ai_auto_response/core/decision_flow.py b/plugins/ai_auto_response/core/decision_flow.py
index 9f298ef..4048a02 100644
--- a/plugins/ai_auto_response/core/decision_flow.py
+++ b/plugins/ai_auto_response/core/decision_flow.py
@@ -6,8 +6,13 @@ from .response_planner import ResponsePlanner
 
 
 class DecisionFlow:
-    def __init__(self, planner: ResponsePlanner | None = None):
-        self.planner = planner or ResponsePlanner()
+    def __init__(self, config: Dict | None = None, planner: ResponsePlanner | None = None):
+        # 这里允许主流程把决策配置显式传进来：
+        # 1. 之前 planner 完全吃默认值，导致“想把更多判断下放给模型”时只能直接改代码；
+        # 2. 现在把配置收口到 decision_flow，后续可以只调 config 来切换本地/模型的分工比例；
+        # 3. 如果外部已经传了自定义 planner，则仍然优先复用，保持兼容。
+        self.config = config or {}
+        self.planner = planner or ResponsePlanner(self.config)
 
     def prepare(self, trigger: Dict, flow_state: str, allow_proactive: bool, acceptance_state: str, conversation_hints: Dict) -> Dict:
         reply_mode = self.planner.choose_reply_mode(trigger, flow_state)
diff --git a/plugins/ai_auto_response/core/prompt_builder.py b/plugins/ai_auto_response/core/prompt_builder.py
index 144d376..3d05c79 100644
--- a/plugins/ai_auto_response/core/prompt_builder.py
+++ b/plugins/ai_auto_response/core/prompt_builder.py
@@ -13,6 +13,10 @@ def build_user_prompt(context: Dict, memory_hints: Dict) -> str:
             for item in recent_items
         ]
     )
+    # 这里的 reply_mode 现在只当“本地推荐模式”看待：
+    # 1. 用户希望把是否参与和如何回复更多地下放给 LLM；
+    # 2. 因此 prompt 里不再把本地 reply_mode 当成必须遵守的硬指令；
+    # 3. 它仍然保留，是为了给模型一个弱提示，帮助它理解本地默认预期的回复强度。
     reply_mode = str(context.get("reply_mode", "social_short") or "social_short")
     length_rule = build_length_rule(reply_mode)
     group_profile = context.get("group_profile", {}) or {}
@@ -79,8 +83,10 @@ def build_user_prompt(context: Dict, memory_hints: Dict) -> str:
             "\n".join(
                 [
                     f"触发类型：{context.get('trigger_type', 'none')}",
-                    f"回复模式：{reply_mode}",
+                    f"本地推荐回复模式：{reply_mode}（仅供参考，你可自行改判）",
                     f"当前心流状态：{context.get('flow_state', 'idle')}",
+                    f"群接受度：{context.get('acceptance_state', 'neutral')}",
+                    f"最近是否已有真人在回答：{'是' if context.get('has_recent_human_solver') else '否'}",
                     f"回归状态：{memory_hints.get('returning_member_state', '') or 'none'}",
                 ]
             ),
@@ -91,6 +97,7 @@ def build_user_prompt(context: Dict, memory_hints: Dict) -> str:
             "\n".join(
                 [
                     "只输出一个 JSON 对象，不要输出 markdown、代码块或解释。",
+                    "是否回复、是否参与聊天、最终 reply_mode 由你自己判断；本地推荐模式不是硬约束。",
                     '{"should_reply":true,"topic_id":"latest:0","topic_summary":"一句话概括当前这次在聊什么","reply_mode":"social_short","reply":"最终发到群里的内容"}',
                     "`should_reply=false` 时，`reply` 必须是空字符串。",
                     "`reply_mode` 只能是 `social_short`、`qa_fast`、`qa_with_context` 之一。",
diff --git a/plugins/ai_auto_response/core/response_planner.py b/plugins/ai_auto_response/core/response_planner.py
index e8869eb..d7630b6 100644
--- a/plugins/ai_auto_response/core/response_planner.py
+++ b/plugins/ai_auto_response/core/response_planner.py
@@ -4,28 +4,35 @@ from typing import Dict
 
 
 class ResponsePlanner:
+    def __init__(self, config: Dict | None = None):
+        self.config = config or {}
+        # 这里把“是否更多地交给模型判断”做成显式开关：
+        # 1. 用户当前希望减少本地前置判定，因此默认按 llm-first 思路工作；
+        # 2. 本地仍保留极轻的一层入口判断，避免在关闭主动参与时把所有文本都送模型；
+        # 3. 后续如果想回退到更保守策略，只需要调这个配置，不必重写整段逻辑。
+        self.model_decides_participation = bool(self.config.get("model_decides_participation", True))
+        # 这个开关控制“当允许主动参与时，是否放宽到普通聊天文本也能进入模型”：
+        # 1. 开启后，只要消息通过了安全/去重/垃圾过滤，本地就不再过度替模型做语义判断；
+        # 2. 关闭后，仍会要求至少出现一个轻量信号才送模型；
+        # 3. 默认开启，更符合当前“减少本地策略、放给 LLM 判断”的目标。
+        self.allow_all_text_when_proactive = bool(self.config.get("allow_all_text_when_proactive", True))
+
     def choose_reply_mode(self, trigger: Dict, flow_state: str) -> str:
-        # 被明确点名辱骂/挑衅时，最像真人的反应通常不是长解释，
-        # 而是一句短短的回怼或挡回去，所以这里强制走 social_short。
-        if trigger.get("is_directed_abuse"):
-            return "social_short"
-        # “群里最近都聊什么”这类问题，本质是在问群记忆摘要：
-        # 1. 如果继续走 qa_fast，就只会优先依赖最近现场消息；
-        # 2. 这里直接抬到 qa_with_context，后面才会打开群事实/向量记忆等补充层；
-        # 3. 不依赖 flow_state，是因为这类问题和当前场子热不热关系不大。
-        if trigger.get("is_question") and trigger.get("is_group_memory_query"):
+        # 这里的 reply_mode 不再承担“本地决定怎么回”的职责，而只是一个推荐值：
+        # 1. 模型最终仍然可以自行输出 social_short / qa_fast / qa_with_context；
+        # 2. 本地推荐只影响上下文和记忆装配的默认力度，尽量别让模型一上来就缺信息；
+        # 3. 所以这里改成“偏放宽”的推荐逻辑，而不是过去那种很强的本地场景裁决。
+        if trigger.get("is_group_memory_query"):
+            return "qa_with_context"
+        if trigger.get("is_question") or trigger.get("question_detected"):
             return "qa_with_context"
-        if trigger.get("is_question"):
-            return "qa_with_context" if flow_state in {"engaged", "deep_engaged"} else "qa_fast"
         if trigger.get("is_followup"):
-            if trigger.get("is_directed"):
-                return "qa_with_context" if flow_state in {"engaged", "deep_engaged"} else "qa_fast"
-            return "social_short"
-        if trigger.get("is_social_call"):
-            return "social_short"
+            return "qa_with_context"
         if trigger.get("is_returning_member"):
-            return "social_short"
-        return "social_short" if flow_state in {"deep_engaged"} else "refuse_or_skip"
+            return "qa_fast"
+        if trigger.get("topic") or trigger.get("is_social_call") or trigger.get("is_directed_abuse"):
+            return "qa_fast" if flow_state in {"engaged", "deep_engaged"} else "social_short"
+        return "social_short"
 
     def should_consider_model(
         self,
@@ -36,73 +43,23 @@ class ResponsePlanner:
         conversation_hints: Dict | None = None,
     ) -> bool:
         conversation_hints = conversation_hints or {}
-        trigger_type = str(trigger.get("trigger_type", "") or "")
-        directed = bool(trigger.get("is_directed"))
-        question_detected = bool(trigger.get("question_detected"))
-        allow_undirected_question = bool(trigger.get("allow_undirected_question"))
-        if trigger.get("is_at") or trigger_type == "at_trigger":
+        if not self.model_decides_participation:
+            # 兼容保守模式：
+            # 1. 如果显式关闭“模型主导参与判断”，则退回到较轻的信号门槛；
+            # 2. 即便如此，也不再恢复旧版那种大量依赖 flow/acceptance/solver 的强本地裁决；
+            # 3. 这里只保留“有无轻量触发信号”的粗判断。
+            return self._has_entry_signal(trigger, conversation_hints)
+
+        # llm-first 模式：
+        # 1. 只要当前消息通过了前面的硬过滤，本地就尽量不再替模型做人类语境判断；
+        # 2. directed/question/followup/topic 等轻量信号仍然保留，用来保证在关闭主动参与时有个最小入口；
+        # 3. 当 allow_proactive_reply=true 时，默认允许普通文本也进模型，由模型自己返回 should_reply。
+        has_signal = self._has_entry_signal(trigger, conversation_hints)
+        if has_signal:
             return True
-        if trigger.get("is_directed_abuse") and directed:
-            return True
-        if trigger_type == "quote_followup_trigger" and directed:
-            return True
-        if trigger.get("is_question") and conversation_hints.get("has_recent_human_solver") and flow_state == "idle":
-            return False
-        # 关键收敛：
-        # 1. 群里的普通问句，哪怕命中了 topic，也不应该因为“当前气氛热”就被 bot 主动接住；
-        # 2. 只要它有明显问句形态，但又没有明确指向 bot，就整体禁止进入模型，
-        #    从根上阻断“别人互相问一句，bot 突然抢答”的尴尬感。
-        # 这里额外给 allow_undirected_question 开一个窄口：
-        # 1. 仅当上游明确配置允许“群问句适度参与”时，才不在这里一刀切拦截；
-        # 2. 但是否真的放行，还要继续走下面针对 is_question 的更细条件；
-        # 3. 这样能把“完全不说话”和“见问就答”之间留出一个可调的中间带。
-        if question_detected and not directed and not trigger.get("is_followup") and not allow_undirected_question:
-            return False
-        if trigger.get("is_question"):
-            # 策略收敛：
-            # 问答类回复只在“明确指向机器人”时触发，防止把群友之间的疑问句当作对机器人提问。
-            # 这层作为兜底，即使上游触发器未来被调整，也不会回到“疑问句高频抢答”的状态。
-            if directed:
-                return True
-            # 对“非定向问句”的放行继续保持保守，只在以下场景参与：
-            # 1. 明显命中了当前产品关心的话题/技术域；
-            # 2. 或群心流已经 warming/engaged，说明 bot 最近参与没有被明显冷处理；
-            # 3. 或 acceptance_state 已经 warm，说明群里对 bot 的接话接受度较高。
-            # 同时保留上面的 has_recent_human_solver 限制，避免大家已经在答了 bot 还硬插话。
-            return allow_undirected_question and (
-                bool(trigger.get("topic"))
-                or flow_state in {"warming", "engaged", "deep_engaged"}
-                or acceptance_state == "warm"
-            )
-        if trigger.get("is_followup"):
-            if directed:
-                return True
-            return (
-                flow_state in {"engaged", "deep_engaged"}
-                and acceptance_state == "warm"
-                and bool(trigger.get("topic"))
-            )
-        if trigger.get("is_social_call"):
-            if acceptance_state == "cold":
-                return False
-            if directed:
-                return True
-            return flow_state in {"warming", "engaged", "deep_engaged"} or acceptance_state == "warm"
-        if trigger.get("topic"):
-            if not allow_proactive:
-                return False
-            if acceptance_state == "cold":
-                return False
-            return flow_state in {"warming", "engaged", "deep_engaged"} or trigger.get("priority", 0) >= 0.4
-        if trigger.get("is_returning_member"):
-            return directed or acceptance_state != "cold"
         if not allow_proactive:
             return False
-        if acceptance_state == "cold":
-            return False
-        if acceptance_state == "neutral":
-            return flow_state in {"engaged", "deep_engaged"} and trigger.get("priority", 0) >= 0.7
-        return flow_state in {"warming", "engaged", "deep_engaged"} and trigger.get("priority", 0) >= 0.45
+        return self.allow_all_text_when_proactive
 
     def should_reply(
         self,
@@ -113,3 +70,27 @@ class ResponsePlanner:
         conversation_hints: Dict | None = None,
     ) -> bool:
         return self.should_consider_model(trigger, flow_state, allow_proactive, acceptance_state, conversation_hints)
+
+    @staticmethod
+    def _has_entry_signal(trigger: Dict, conversation_hints: Dict | None = None) -> bool:
+        conversation_hints = conversation_hints or {}
+        trigger_type = str(trigger.get("trigger_type", "") or "")
+        # 这里定义“送模型的轻量信号”：
+        # 1. 这些信号只说明“这条消息可能值得模型看一眼”，不等于本地已经决定要回复；
+        # 2. 选择的都是低成本、稳定、可解释的布尔条件，避免重新走回复杂规则树；
+        # 3. quote_targets_bot 也纳入进来，确保引用 bot 的续聊不会因为 trigger_type 偏弱而被挡掉。
+        return any(
+            [
+                bool(trigger.get("is_at")),
+                bool(trigger.get("is_directed")),
+                bool(trigger.get("question_detected")),
+                bool(trigger.get("is_question")),
+                bool(trigger.get("is_followup")),
+                bool(trigger.get("is_social_call")),
+                bool(trigger.get("is_returning_member")),
+                bool(trigger.get("is_directed_abuse")),
+                bool(trigger.get("topic")),
+                bool(conversation_hints.get("quote_targets_bot")),
+                trigger_type not in {"", "none"},
+            ]
+        )
diff --git a/plugins/ai_auto_response/main.py b/plugins/ai_auto_response/main.py
index 1805c14..e1bf312 100644
--- a/plugins/ai_auto_response/main.py
+++ b/plugins/ai_auto_response/main.py
@@ -127,7 +127,12 @@ class AIAutoResponsePlugin(MessagePluginInterface):
         self.memory_store = MemoryStore(self.db_manager, merged_memory_config)
         self.vector_memory = VectorMemoryStore(self._config.get("memory", {}) or {})
         self.context_builder = ContextBuilder(int((self._config.get("mode", {}) or {}).get("recent_context_size", 30)))
-        self.decision_flow = DecisionFlow()
+        # 决策配置单独收口：
+        # 1. 现在用户希望把“是否参与聊天/是否回复”的判断更多地下放给 LLM；
+        # 2. 因此这里把 decision 配置显式传给 DecisionFlow，避免内部继续偷偷走硬编码默认值；
+        # 3. 后续只要调 config，就能继续微调“本地粗筛”和“模型统一判断”的分工比例。
+        self.decision_config = self._config.get("decision", {}) or {}
+        self.decision_flow = DecisionFlow(self.decision_config)
         self.llm_client = UnifiedLLMClient(self._config.get("api", {}) or {})
         self.social_memory = SocialMemoryService(self.db_manager, self._config.get("memory", {}) or {})
         self.group_facts = GroupFactsService(self._config.get("memory", {}) or {})
@@ -441,8 +446,8 @@ class AIAutoResponsePlugin(MessagePluginInterface):
                 conversation_hints,
             )
             reply_mode = str(decision.get("reply_mode", "social_short") or "social_short")
-            should_reply = bool(decision.get("should_consider_model"))
-            if not should_reply:
+            should_enter_model = bool(decision.get("should_consider_model"))
+            if not should_enter_model:
                 self._log_event(
                     "skip",
                     room_id=room_id,
@@ -456,17 +461,6 @@ class AIAutoResponsePlugin(MessagePluginInterface):
                     solver=yn(conversation_hints.get("has_recent_human_solver")),
                 )
                 return False, "skip"
-            if not self.cooldown.pass_cooldown(room_id, sender, trigger.__dict__):
-                self._log_event(
-                    "skip",
-                    room_id=room_id,
-                    sender=sender,
-                    reason=trigger.__dict__.get("_cooldown_reason", "cooldown"),
-                    trigger_type=trigger.trigger_type,
-                    reply_mode=reply_mode,
-                    topic=trigger.topic or "",
-                )
-                return False, "cooldown"
 
             vector_memories = []
             if self.vector_memory.should_search(reply_mode, trigger.trigger_type, memory_hints.get("returning_member_state", "")):
@@ -557,6 +551,13 @@ class AIAutoResponsePlugin(MessagePluginInterface):
                 image_context=image_context,
             )
             context["coding_work_request"] = coding_work_request
+            # 这些控制信号现在会直接下放给模型，辅助它统一决定 should_reply：
+            # 1. acceptance_state 表示群体对 bot 最近几次发言的接受度；
+            # 2. has_recent_human_solver 表示最近已经有人在接这个问题；
+            # 3. 本地不再用它们强裁决，而是改成“显式告知模型，由模型自己衡量要不要插话”。
+            context["acceptance_state"] = acceptance_state
+            context["has_recent_human_solver"] = bool(conversation_hints.get("has_recent_human_solver"))
+            context["solver_count"] = int(conversation_hints.get("solver_count", 0) or 0)
             # 这个标记只作为模型输入信号，不在本地直接生成固定回复。
             # 这样既能让模型知道“这次是在被点名挑衅”，又不会暴露出模板式机器人痕迹。
             context["abuse_directed"] = is_directed_abuse(
@@ -633,6 +634,21 @@ class AIAutoResponsePlugin(MessagePluginInterface):
                     topic=selected_topic,
                 )
                 return False, "llm_empty_reply"
+            # 冷却从“模型前挡板”改成“模型后发送闸门”：
+            # 1. 这样本地不再替模型过早决定“值不值得参与”；
+            # 2. 模型先统一判断 should_reply，只有当它明确想回时，才进入频率控制；
+            # 3. 仍然保留冷却，是为了守住群内刷屏风险，但职责已经变成“限制发送”，不是“替模型做语义裁决”。
+            if not self.cooldown.pass_cooldown(room_id, sender, trigger.__dict__):
+                self._log_event(
+                    "skip",
+                    room_id=room_id,
+                    sender=sender,
+                    reason=f"post_llm_{trigger.__dict__.get('_cooldown_reason', 'cooldown')}",
+                    trigger_type=trigger.trigger_type,
+                    reply_mode=reply_mode,
+                    topic=selected_topic,
+                )
+                return False, "post_llm_cooldown"
 
             reply_chunks = finalize_reply(reply_text, reply_mode, self.reply_limits)
             final_response_text = "\n".join(reply_chunks)
@@ -1054,13 +1070,21 @@ class AIAutoResponsePlugin(MessagePluginInterface):
         )
 
         control_lines = [
-            f"reply_mode={context.get('reply_mode', 'social_short')}",
+            # 这里显式把本地 reply_mode 改成 hint，而不是命令：
+            # 1. 用户希望把“回不回、怎么回”的判断更多地下放给模型；
+            # 2. 因此 control 中的模式值只表达“本地建议给多大上下文力度”，不是必须照做；
+            # 3. 模型仍然要根据现场语境自行决定 should_reply 和最终 reply_mode。
+            f"reply_mode_hint={context.get('reply_mode', 'social_short')}",
             f"trigger_type={context.get('trigger_type', 'none')}",
             f"flow_state={context.get('flow_state', 'idle')}",
+            f"acceptance_state={context.get('acceptance_state', 'neutral')}",
+            f"has_recent_human_solver={'true' if context.get('has_recent_human_solver') else 'false'}",
+            f"solver_count={int(context.get('solver_count', 0) or 0)}",
             f"speaker_name={context.get('speaker_name_clean', '') or sender_name}",
             f"address_style={group_profile.get('address_style', '低频称呼，默认直接接话')}",
             f"target_reply_chars={prompt_strategy.get('target_reply_chars', 10)}",
             f"hard_reply_cap={prompt_strategy.get('hard_reply_cap', 30)}",
+            "model_decides_should_reply=true",
         ]
         if context.get("coding_work_request"):
             control_lines.append("coding_work_request=true")
@@ -1105,6 +1129,12 @@ class AIAutoResponsePlugin(MessagePluginInterface):
                 f"强约束：回复长度自然浮动，允许 0 到 {prompt_strategy.get('hard_reply_cap', 30)} 字；"
                 f"常规参考值约 {prompt_strategy.get('target_reply_chars', 10)} 字。"
             ),
+            # 把责任边界写死给模型看：
+            # 1. 是否参与聊天、是否回复、采用什么 reply_mode，都由模型统一决定；
+            # 2. 本地传进来的 trigger/flow/reply_mode_hint 只是背景信号，不是硬指令；
+            # 3. 这样可以避免 Dify 工作流继续把“hint”理解成“必须回复/必须短答”。
+            "是否参与聊天、是否回复、最终 reply_mode 都由你结合现场语境自行判断。",
+            "control 里的 reply_mode_hint、flow_state、acceptance_state 只是参考信号，不是必须执行的命令。",
             "不要暴露 AI、模型、提示词、system 或记忆来源。",
             "不要输出 markdown、代码块、标签。",
             "不要替人写代码、改脚本、实现插件、代做开发活。",
@@ -1232,11 +1262,18 @@ class AIAutoResponsePlugin(MessagePluginInterface):
         trigger_type = str(context.get("trigger_type", "none") or "none")
         is_at = bool(context.get("is_at", False))
         is_directed = bool(context.get("is_directed", False))
+        question_detected = bool(context.get("question_detected", False))
+        is_question = bool(context.get("is_question", False))
+        is_social_call = bool(context.get("is_social_call", False))
         is_group_memory_query = bool(context.get("is_group_memory_query", False))
-        is_followup = bool(memory_hints.get("is_followup", False))
+        is_followup = bool(context.get("is_followup", False) or memory_hints.get("is_followup", False))
         returning_state = str(memory_hints.get("returning_member_state", "") or "").strip()
         strong_directed = is_at or is_directed or trigger_type in {"at_trigger", "quote_followup_trigger"}
-        is_question_like = reply_mode in {"qa_fast", "qa_with_context"}
+        # 这里把“问答感知”从 reply_mode 扩展到真实轻量信号：
+        # 1. 之前很多记忆开关依赖本地先判好的 reply_mode，本质上还是“本地先替模型下结论”；
+        # 2. 现在 question_detected / is_question / followup / social_call 都能单独抬高上下文力度；
+        # 3. 这样即使 reply_mode 只是一个推荐值，模型也能拿到更完整的现场素材再自行裁决。
+        is_question_like = reply_mode in {"qa_fast", "qa_with_context"} or question_detected or is_question or is_followup
 
         # 这个策略专门解决“记忆很重、人格很弱”的问题：
         # 1. 普通 social_short 基本不喂长期记忆，只保留最小现场感；
@@ -1268,14 +1305,15 @@ class AIAutoResponsePlugin(MessagePluginInterface):
         # 1. 用户希望回答能带上群里的长期背景和互动关系；
         # 2. 关系记忆仍会经过相关性过滤，所以放宽入口不会直接把无关关系灌进去；
         # 3. 这样技术问答里也更容易利用“谁经常和谁接话、谁常问哪类问题”的弱背景。
-        allow_social_memory = is_question_like or is_group_memory_query
+        allow_social_memory = is_question_like or is_group_memory_query or is_social_call
         # “最近都聊什么”这类问题，本身就是在问群级记忆，
         # 所以哪怕当前只是普通问答入口，也要把群事实和向量层放开。
-        allow_group_facts = reply_mode == "qa_with_context" or is_group_memory_query
+        allow_group_facts = reply_mode == "qa_with_context" or is_group_memory_query or (question_detected and bool(context.get("topic", "")))
         allow_vector_memory = (
             reply_mode == "qa_with_context"
             or returning_state == "long_absent_member"
             or is_group_memory_query
+            or (question_detected and bool(context.get("topic", "")))
         )
 
         return {