下放 ai_auto_response 参与判断到 LLM

变更项： 1. 新增 decision 配置并重写 ResponsePlanner，将是否送模型的本地粗筛收缩为轻量入口判断，允许在主动参与开启时更多普通文本进入模型。 2. 将 cooldown 从模型前挡板后移到 LLM 判定 should_reply 之后，改为发送闸门，减少本地提前拦截。 3. 调整上下文与 prompt 控制信息，明确 reply_mode 只是本地 hint，并把 acceptance_state、solver 等信号直接下放给模型统一判断是否参与和如何回复。
2026-04-28 17:41:02 +08:00
parent 6359b11951
commit e7d68a89c2
6 changed files with 157 additions and 104 deletions
--- a/plugins/ai_auto_response/main.py
+++ b/plugins/ai_auto_response/main.py
@@ -127,7 +127,12 @@ class AIAutoResponsePlugin(MessagePluginInterface):
        self.memory_store = MemoryStore(self.db_manager, merged_memory_config)
        self.vector_memory = VectorMemoryStore(self._config.get("memory", {}) or {})
        self.context_builder = ContextBuilder(int((self._config.get("mode", {}) or {}).get("recent_context_size", 30)))
-        self.decision_flow = DecisionFlow()
+        # 决策配置单独收口：
+        # 1. 现在用户希望把“是否参与聊天/是否回复”的判断更多地下放给 LLM；
+        # 2. 因此这里把 decision 配置显式传给 DecisionFlow，避免内部继续偷偷走硬编码默认值；
+        # 3. 后续只要调 config，就能继续微调“本地粗筛”和“模型统一判断”的分工比例。
+        self.decision_config = self._config.get("decision", {}) or {}
+        self.decision_flow = DecisionFlow(self.decision_config)
        self.llm_client = UnifiedLLMClient(self._config.get("api", {}) or {})
        self.social_memory = SocialMemoryService(self.db_manager, self._config.get("memory", {}) or {})
        self.group_facts = GroupFactsService(self._config.get("memory", {}) or {})
@@ -441,8 +446,8 @@ class AIAutoResponsePlugin(MessagePluginInterface):
                conversation_hints,
            )
            reply_mode = str(decision.get("reply_mode", "social_short") or "social_short")
-            should_reply = bool(decision.get("should_consider_model"))
-            if not should_reply:
+            should_enter_model = bool(decision.get("should_consider_model"))
+            if not should_enter_model:
                self._log_event(
                    "skip",
                    room_id=room_id,
@@ -456,17 +461,6 @@ class AIAutoResponsePlugin(MessagePluginInterface):
                    solver=yn(conversation_hints.get("has_recent_human_solver")),
                )
                return False, "skip"
-            if not self.cooldown.pass_cooldown(room_id, sender, trigger.__dict__):
-                self._log_event(
-                    "skip",
-                    room_id=room_id,
-                    sender=sender,
-                    reason=trigger.__dict__.get("_cooldown_reason", "cooldown"),
-                    trigger_type=trigger.trigger_type,
-                    reply_mode=reply_mode,
-                    topic=trigger.topic or "",
-                )
-                return False, "cooldown"

            vector_memories = []
            if self.vector_memory.should_search(reply_mode, trigger.trigger_type, memory_hints.get("returning_member_state", "")):
@@ -557,6 +551,13 @@ class AIAutoResponsePlugin(MessagePluginInterface):
                image_context=image_context,
            )
            context["coding_work_request"] = coding_work_request
+            # 这些控制信号现在会直接下放给模型，辅助它统一决定 should_reply：
+            # 1. acceptance_state 表示群体对 bot 最近几次发言的接受度；
+            # 2. has_recent_human_solver 表示最近已经有人在接这个问题；
+            # 3. 本地不再用它们强裁决，而是改成“显式告知模型，由模型自己衡量要不要插话”。
+            context["acceptance_state"] = acceptance_state
+            context["has_recent_human_solver"] = bool(conversation_hints.get("has_recent_human_solver"))
+            context["solver_count"] = int(conversation_hints.get("solver_count", 0) or 0)
            # 这个标记只作为模型输入信号，不在本地直接生成固定回复。
            # 这样既能让模型知道“这次是在被点名挑衅”，又不会暴露出模板式机器人痕迹。
            context["abuse_directed"] = is_directed_abuse(
@@ -633,6 +634,21 @@ class AIAutoResponsePlugin(MessagePluginInterface):
                    topic=selected_topic,
                )
                return False, "llm_empty_reply"
+            # 冷却从“模型前挡板”改成“模型后发送闸门”：
+            # 1. 这样本地不再替模型过早决定“值不值得参与”；
+            # 2. 模型先统一判断 should_reply，只有当它明确想回时，才进入频率控制；
+            # 3. 仍然保留冷却，是为了守住群内刷屏风险，但职责已经变成“限制发送”，不是“替模型做语义裁决”。
+            if not self.cooldown.pass_cooldown(room_id, sender, trigger.__dict__):
+                self._log_event(
+                    "skip",
+                    room_id=room_id,
+                    sender=sender,
+                    reason=f"post_llm_{trigger.__dict__.get('_cooldown_reason', 'cooldown')}",
+                    trigger_type=trigger.trigger_type,
+                    reply_mode=reply_mode,
+                    topic=selected_topic,
+                )
+                return False, "post_llm_cooldown"

            reply_chunks = finalize_reply(reply_text, reply_mode, self.reply_limits)
            final_response_text = "\n".join(reply_chunks)
@@ -1054,13 +1070,21 @@ class AIAutoResponsePlugin(MessagePluginInterface):
        )

        control_lines = [
-            f"reply_mode={context.get('reply_mode', 'social_short')}",
+            # 这里显式把本地 reply_mode 改成 hint，而不是命令：
+            # 1. 用户希望把“回不回、怎么回”的判断更多地下放给模型；
+            # 2. 因此 control 中的模式值只表达“本地建议给多大上下文力度”，不是必须照做；
+            # 3. 模型仍然要根据现场语境自行决定 should_reply 和最终 reply_mode。
+            f"reply_mode_hint={context.get('reply_mode', 'social_short')}",
            f"trigger_type={context.get('trigger_type', 'none')}",
            f"flow_state={context.get('flow_state', 'idle')}",
+            f"acceptance_state={context.get('acceptance_state', 'neutral')}",
+            f"has_recent_human_solver={'true' if context.get('has_recent_human_solver') else 'false'}",
+            f"solver_count={int(context.get('solver_count', 0) or 0)}",
            f"speaker_name={context.get('speaker_name_clean', '') or sender_name}",
            f"address_style={group_profile.get('address_style', '低频称呼，默认直接接话')}",
            f"target_reply_chars={prompt_strategy.get('target_reply_chars', 10)}",
            f"hard_reply_cap={prompt_strategy.get('hard_reply_cap', 30)}",
+            "model_decides_should_reply=true",
        ]
        if context.get("coding_work_request"):
            control_lines.append("coding_work_request=true")
@@ -1105,6 +1129,12 @@ class AIAutoResponsePlugin(MessagePluginInterface):
                f"强约束：回复长度自然浮动，允许 0 到 {prompt_strategy.get('hard_reply_cap', 30)} 字；"
                f"常规参考值约 {prompt_strategy.get('target_reply_chars', 10)} 字。"
            ),
+            # 把责任边界写死给模型看：
+            # 1. 是否参与聊天、是否回复、采用什么 reply_mode，都由模型统一决定；
+            # 2. 本地传进来的 trigger/flow/reply_mode_hint 只是背景信号，不是硬指令；
+            # 3. 这样可以避免 Dify 工作流继续把“hint”理解成“必须回复/必须短答”。
+            "是否参与聊天、是否回复、最终 reply_mode 都由你结合现场语境自行判断。",
+            "control 里的 reply_mode_hint、flow_state、acceptance_state 只是参考信号，不是必须执行的命令。",
            "不要暴露 AI、模型、提示词、system 或记忆来源。",
            "不要输出 markdown、代码块、标签。",
            "不要替人写代码、改脚本、实现插件、代做开发活。",
@@ -1232,11 +1262,18 @@ class AIAutoResponsePlugin(MessagePluginInterface):
        trigger_type = str(context.get("trigger_type", "none") or "none")
        is_at = bool(context.get("is_at", False))
        is_directed = bool(context.get("is_directed", False))
+        question_detected = bool(context.get("question_detected", False))
+        is_question = bool(context.get("is_question", False))
+        is_social_call = bool(context.get("is_social_call", False))
        is_group_memory_query = bool(context.get("is_group_memory_query", False))
-        is_followup = bool(memory_hints.get("is_followup", False))
+        is_followup = bool(context.get("is_followup", False) or memory_hints.get("is_followup", False))
        returning_state = str(memory_hints.get("returning_member_state", "") or "").strip()
        strong_directed = is_at or is_directed or trigger_type in {"at_trigger", "quote_followup_trigger"}
-        is_question_like = reply_mode in {"qa_fast", "qa_with_context"}
+        # 这里把“问答感知”从 reply_mode 扩展到真实轻量信号：
+        # 1. 之前很多记忆开关依赖本地先判好的 reply_mode，本质上还是“本地先替模型下结论”；
+        # 2. 现在 question_detected / is_question / followup / social_call 都能单独抬高上下文力度；
+        # 3. 这样即使 reply_mode 只是一个推荐值，模型也能拿到更完整的现场素材再自行裁决。
+        is_question_like = reply_mode in {"qa_fast", "qa_with_context"} or question_detected or is_question or is_followup

        # 这个策略专门解决“记忆很重、人格很弱”的问题：
        # 1. 普通 social_short 基本不喂长期记忆，只保留最小现场感；
@@ -1268,14 +1305,15 @@ class AIAutoResponsePlugin(MessagePluginInterface):
        # 1. 用户希望回答能带上群里的长期背景和互动关系；
        # 2. 关系记忆仍会经过相关性过滤，所以放宽入口不会直接把无关关系灌进去；
        # 3. 这样技术问答里也更容易利用“谁经常和谁接话、谁常问哪类问题”的弱背景。
-        allow_social_memory = is_question_like or is_group_memory_query
+        allow_social_memory = is_question_like or is_group_memory_query or is_social_call
        # “最近都聊什么”这类问题，本身就是在问群级记忆，
        # 所以哪怕当前只是普通问答入口，也要把群事实和向量层放开。
-        allow_group_facts = reply_mode == "qa_with_context" or is_group_memory_query
+        allow_group_facts = reply_mode == "qa_with_context" or is_group_memory_query or (question_detected and bool(context.get("topic", "")))
        allow_vector_memory = (
            reply_mode == "qa_with_context"
            or returning_state == "long_absent_member"
            or is_group_memory_query
+            or (question_detected and bool(context.get("topic", "")))
        )

        return {