diff --git a/plugins/ai_auto_response/config.toml b/plugins/ai_auto_response/config.toml
index c082605..8f5ae79 100644
--- a/plugins/ai_auto_response/config.toml
+++ b/plugins/ai_auto_response/config.toml
@@ -15,8 +15,8 @@ endpoint = "chat/completions"
 api_key = "sk-XTWwXIgo2QMyX8AwBg0NQrxaDkvQiCX8rfylfmnHID5zdjMt"
 model = "gpt-5.4"
 timeout_seconds = 45
-temperature = 0.7
-max_tokens = 500
+temperature = 0.35
+max_tokens = 120
 stream = true
 
 [mode]
@@ -39,24 +39,24 @@ casual_topic = 0.35
 
 [flow]
 enable_flow_state = true
-flow_decay_per_minute = 8
-idle_threshold = 20
-warming_threshold = 40
-engaged_threshold = 70
+flow_decay_per_minute = 12
+idle_threshold = 30
+warming_threshold = 55
+engaged_threshold = 90
 at_bot_boost = 40
 question_boost = 30
 followup_boost = 20
-topic_boost = 15
-returning_member_boost = 10
-response_accepted_boost = 15
+topic_boost = 8
+returning_member_boost = 6
+response_accepted_boost = 10
 ignored_reply_penalty = 20
-over_reply_penalty = 15
+over_reply_penalty = 22
 night_penalty = 30
-max_bot_reply_streak = 3
+max_bot_reply_streak = 2
 
 [cooldown]
-group_reply_cooldown_sec = 45
-same_user_followup_cooldown_sec = 10
+group_reply_cooldown_sec = 90
+same_user_followup_cooldown_sec = 18
 night_silent_hours = ["01:00-07:30"]
 
 [memory]
@@ -69,6 +69,7 @@ qdrant_collection = "abot_xiaoniu_memory"
 ollama_base_url = "http://192.168.2.50:11434"
 embedding_model = "bge-m3"
 vector_top_k = 5
+max_context_memories = 2
 vector_min_score = 0.65
 vector_trigger_modes = ["returning_member", "long_absent_member", "qa_with_context", "reactivated_topic"]
 
diff --git a/plugins/ai_auto_response/context_builder.py b/plugins/ai_auto_response/context_builder.py
index ae398ae..e44a9b6 100644
--- a/plugins/ai_auto_response/context_builder.py
+++ b/plugins/ai_auto_response/context_builder.py
@@ -19,7 +19,7 @@ class ContextBuilder:
         vector_memories: List[Dict],
     ) -> Dict:
         recent_lines = []
-        for item in recent_messages[-30:]:
+        for item in recent_messages[-8:]:
             msg_sender = item.get("sender_name") or item.get("sender") or "未知成员"
             msg_content = item.get("content") or item.get("message") or ""
             if msg_content:
@@ -63,7 +63,7 @@ class ContextBuilder:
         if not vector_memories:
             return ""
         lines = []
-        for item in vector_memories[:5]:
+        for item in vector_memories[:2]:
             summary = item.get("content_summary") or item.get("summary_text") or item.get("text") or ""
             memory_type = item.get("memory_type", "memory")
             if summary:
diff --git a/plugins/ai_auto_response/main.py b/plugins/ai_auto_response/main.py
index f670a6d..f78c204 100644
--- a/plugins/ai_auto_response/main.py
+++ b/plugins/ai_auto_response/main.py
@@ -248,6 +248,8 @@ class AIAutoResponsePlugin(MessagePluginInterface):
             )
             return False, "empty_response"
 
+        response = self._finalize_reply(response, reply_mode)
+
         await bot.send_text_message(room_id, response, sender)
         self.last_reply_at[room_id] = time.time()
         self.flow_manager.note_bot_reply(room_id)
@@ -314,7 +316,7 @@ class AIAutoResponsePlugin(MessagePluginInterface):
         return (current_ts - last_room_reply) >= room_cd
 
     def _build_user_prompt(self, context: Dict, memory_hints: Dict) -> str:
-        recent_text = "\n".join(context.get("recent_messages", [])[-20:]) or "暂无"
+        recent_text = "\n".join(context.get("recent_messages", [])[-8:]) or "暂无"
         reply_mode = context.get("reply_mode", "social_short")
         length_rule = self._build_length_rule(reply_mode)
         return (
@@ -333,6 +335,11 @@ class AIAutoResponsePlugin(MessagePluginInterface):
             f"4. 如果信息不足，不要硬编。\n"
             f"5. 输出最终可直接发到群里的内容，不要解释你的思路。\n"
             f"6. {length_rule}\n"
+            f"7. 优先直接回应“当前发言”本身，不要被较早上下文带跑。\n"
+            f"8. 成员记忆和向量召回只有在与当前问题直接相关时才允许使用，否则忽略。\n"
+            f"9. 如果你不确定自己是否理解对了，就宁可不展开，只回很短。\n"
+            f"10. 把这次回复当作真人聊天里的第一反应，先只给第一层结论，不要主动补第二层解释。\n"
+            f"11. 如果一句话已经够了，就立刻停，不要为了完整而补充。\n"
         )
 
     @staticmethod
@@ -343,16 +350,42 @@ class AIAutoResponsePlugin(MessagePluginInterface):
         response = re.sub(r"\n{3,}", "\n\n", response)
         return response[:500].strip()
 
+    def _finalize_reply(self, response: str, reply_mode: str) -> str:
+        text = (response or "").strip()
+        if not text:
+            return ""
+        text = re.sub(r"\s+", " ", text)
+        text = text.replace("\n", " ").strip()
+
+        if reply_mode == "social_short":
+            text = self._take_first_sentence(text, 12)
+        elif reply_mode == "qa_fast":
+            text = self._take_first_sentence(text, 28)
+        elif reply_mode == "qa_with_context":
+            text = self._take_first_sentence(text, 36)
+        else:
+            text = self._take_first_sentence(text, 24)
+        return text.strip()
+
     @staticmethod
     def _build_length_rule(reply_mode: str) -> str:
         if reply_mode == "social_short":
-            return "默认只回一句短话，最好控制在2到12个字，除非非常不自然。"
+            return "默认只回一句短话，最好控制在2到8个字，除非非常不自然。"
         if reply_mode == "qa_fast":
-            return "尽量只回1句话，必要时最多2句，先给结论，不要展开成长教程。"
+            return "尽量只回1句话，总长度优先控制在28字内，先给结论，不要主动补解释。"
         if reply_mode == "qa_with_context":
-            return "优先控制在1到2句，除非对方明显在等详细步骤。"
+            return "优先控制在1句话，必要时最多2句，总长度优先控制在36字内，只给第一层答案。"
         return "尽量短，像群友临时接一句，不要长篇大论。"
 
+    @staticmethod
+    def _take_first_sentence(text: str, limit: int) -> str:
+        parts = re.split(r"(?<=[。！？!?；;])", text)
+        first = parts[0].strip() if parts and parts[0].strip() else text.strip()
+        if len(first) <= limit:
+            return first
+        clipped = first[:limit].rstrip("，,、；;：:")
+        return clipped
+
 
     def _sync_member_memory(self, room_id: str, sender: str, sender_name: str, member_context: Dict) -> None:
         if not member_context:
diff --git a/plugins/ai_auto_response/persona/xiaoniu.txt b/plugins/ai_auto_response/persona/xiaoniu.txt
index d9b2409..6d64128 100644
--- a/plugins/ai_auto_response/persona/xiaoniu.txt
+++ b/plugins/ai_auto_response/persona/xiaoniu.txt
@@ -11,10 +11,13 @@
 
 你的表达偏好：
 - 能一句说清就别说三句
+- 默认宁可短一点，也不要展开过头
+- 默认只给第一反应，不要一次把后续解释全说完
 - 避免客服腔、教程腔、模板腔
 - 除非很有必要，不要长篇大论
 - 允许少量语气词，但不要太油腻
 - 面对回归成员时，可以表现出轻微熟悉感，但不要直接暴露细粒度历史记录
+- 如果当前消息信息不足，就少说，不要自顾自发挥
 
 你的边界：
 - 不要假装知道不存在的上下文
diff --git a/plugins/ai_auto_response/response_planner.py b/plugins/ai_auto_response/response_planner.py
index 7b78ccc..4c9c41f 100644
--- a/plugins/ai_auto_response/response_planner.py
+++ b/plugins/ai_auto_response/response_planner.py
@@ -13,11 +13,11 @@ class ResponsePlanner:
             return "social_short"
         if trigger.get("is_returning_member"):
             return "social_short"
-        return "social_short" if flow_state in {"warming", "engaged"} else "refuse_or_skip"
+        return "social_short" if flow_state in {"deep_engaged"} else "refuse_or_skip"
 
     def should_reply(self, trigger: Dict, flow_state: str, allow_proactive: bool) -> bool:
         if trigger.get("should_respond"):
             return True
         if not allow_proactive:
             return False
-        return flow_state in {"warming", "engaged", "deep_engaged"} and trigger.get("priority", 0) >= 0.35
+        return flow_state in {"deep_engaged"} and trigger.get("priority", 0) >= 0.65