tune xiaoniu reply brevity and flow thresholds

This commit is contained in:
liuwei
2026-04-07 11:36:02 +08:00
parent d616846098
commit 8476149a2d
5 changed files with 58 additions and 21 deletions

View File

@@ -15,8 +15,8 @@ endpoint = "chat/completions"
api_key = "sk-XTWwXIgo2QMyX8AwBg0NQrxaDkvQiCX8rfylfmnHID5zdjMt"
model = "gpt-5.4"
timeout_seconds = 45
temperature = 0.7
max_tokens = 500
temperature = 0.35
max_tokens = 120
stream = true
[mode]
@@ -39,24 +39,24 @@ casual_topic = 0.35
[flow]
enable_flow_state = true
flow_decay_per_minute = 8
idle_threshold = 20
warming_threshold = 40
engaged_threshold = 70
flow_decay_per_minute = 12
idle_threshold = 30
warming_threshold = 55
engaged_threshold = 90
at_bot_boost = 40
question_boost = 30
followup_boost = 20
topic_boost = 15
returning_member_boost = 10
response_accepted_boost = 15
topic_boost = 8
returning_member_boost = 6
response_accepted_boost = 10
ignored_reply_penalty = 20
over_reply_penalty = 15
over_reply_penalty = 22
night_penalty = 30
max_bot_reply_streak = 3
max_bot_reply_streak = 2
[cooldown]
group_reply_cooldown_sec = 45
same_user_followup_cooldown_sec = 10
group_reply_cooldown_sec = 90
same_user_followup_cooldown_sec = 18
night_silent_hours = ["01:00-07:30"]
[memory]
@@ -69,6 +69,7 @@ qdrant_collection = "abot_xiaoniu_memory"
ollama_base_url = "http://192.168.2.50:11434"
embedding_model = "bge-m3"
vector_top_k = 5
max_context_memories = 2
vector_min_score = 0.65
vector_trigger_modes = ["returning_member", "long_absent_member", "qa_with_context", "reactivated_topic"]

View File

@@ -19,7 +19,7 @@ class ContextBuilder:
vector_memories: List[Dict],
) -> Dict:
recent_lines = []
for item in recent_messages[-30:]:
for item in recent_messages[-8:]:
msg_sender = item.get("sender_name") or item.get("sender") or "未知成员"
msg_content = item.get("content") or item.get("message") or ""
if msg_content:
@@ -63,7 +63,7 @@ class ContextBuilder:
if not vector_memories:
return ""
lines = []
for item in vector_memories[:5]:
for item in vector_memories[:2]:
summary = item.get("content_summary") or item.get("summary_text") or item.get("text") or ""
memory_type = item.get("memory_type", "memory")
if summary:

View File

@@ -248,6 +248,8 @@ class AIAutoResponsePlugin(MessagePluginInterface):
)
return False, "empty_response"
response = self._finalize_reply(response, reply_mode)
await bot.send_text_message(room_id, response, sender)
self.last_reply_at[room_id] = time.time()
self.flow_manager.note_bot_reply(room_id)
@@ -314,7 +316,7 @@ class AIAutoResponsePlugin(MessagePluginInterface):
return (current_ts - last_room_reply) >= room_cd
def _build_user_prompt(self, context: Dict, memory_hints: Dict) -> str:
recent_text = "\n".join(context.get("recent_messages", [])[-20:]) or "暂无"
recent_text = "\n".join(context.get("recent_messages", [])[-8:]) or "暂无"
reply_mode = context.get("reply_mode", "social_short")
length_rule = self._build_length_rule(reply_mode)
return (
@@ -333,6 +335,11 @@ class AIAutoResponsePlugin(MessagePluginInterface):
f"4. 如果信息不足,不要硬编。\n"
f"5. 输出最终可直接发到群里的内容,不要解释你的思路。\n"
f"6. {length_rule}\n"
f"7. 优先直接回应“当前发言”本身,不要被较早上下文带跑。\n"
f"8. 成员记忆和向量召回只有在与当前问题直接相关时才允许使用,否则忽略。\n"
f"9. 如果你不确定自己是否理解对了,就宁可不展开,只回很短。\n"
f"10. 把这次回复当作真人聊天里的第一反应,先只给第一层结论,不要主动补第二层解释。\n"
f"11. 如果一句话已经够了,就立刻停,不要为了完整而补充。\n"
)
@staticmethod
@@ -343,16 +350,42 @@ class AIAutoResponsePlugin(MessagePluginInterface):
response = re.sub(r"\n{3,}", "\n\n", response)
return response[:500].strip()
def _finalize_reply(self, response: str, reply_mode: str) -> str:
text = (response or "").strip()
if not text:
return ""
text = re.sub(r"\s+", " ", text)
text = text.replace("\n", " ").strip()
if reply_mode == "social_short":
text = self._take_first_sentence(text, 12)
elif reply_mode == "qa_fast":
text = self._take_first_sentence(text, 28)
elif reply_mode == "qa_with_context":
text = self._take_first_sentence(text, 36)
else:
text = self._take_first_sentence(text, 24)
return text.strip()
@staticmethod
def _build_length_rule(reply_mode: str) -> str:
if reply_mode == "social_short":
return "默认只回一句短话最好控制在2到12个字,除非非常不自然。"
return "默认只回一句短话最好控制在2到8个字,除非非常不自然。"
if reply_mode == "qa_fast":
return "尽量只回1句话必要时最多2句,先给结论,不要展开成长教程"
return "尽量只回1句话总长度优先控制在28字内,先给结论,不要主动补解释"
if reply_mode == "qa_with_context":
return "优先控制在1到2句除非对方明显在等详细步骤"
return "优先控制在1句话必要时最多2句总长度优先控制在36字内只给第一层答案"
return "尽量短,像群友临时接一句,不要长篇大论。"
@staticmethod
def _take_first_sentence(text: str, limit: int) -> str:
parts = re.split(r"(?<=[。!?!?;])", text)
first = parts[0].strip() if parts and parts[0].strip() else text.strip()
if len(first) <= limit:
return first
clipped = first[:limit].rstrip(",、;;:")
return clipped
def _sync_member_memory(self, room_id: str, sender: str, sender_name: str, member_context: Dict) -> None:
if not member_context:

View File

@@ -11,10 +11,13 @@
你的表达偏好:
- 能一句说清就别说三句
- 默认宁可短一点,也不要展开过头
- 默认只给第一反应,不要一次把后续解释全说完
- 避免客服腔、教程腔、模板腔
- 除非很有必要,不要长篇大论
- 允许少量语气词,但不要太油腻
- 面对回归成员时,可以表现出轻微熟悉感,但不要直接暴露细粒度历史记录
- 如果当前消息信息不足,就少说,不要自顾自发挥
你的边界:
- 不要假装知道不存在的上下文

View File

@@ -13,11 +13,11 @@ class ResponsePlanner:
return "social_short"
if trigger.get("is_returning_member"):
return "social_short"
return "social_short" if flow_state in {"warming", "engaged"} else "refuse_or_skip"
return "social_short" if flow_state in {"deep_engaged"} else "refuse_or_skip"
def should_reply(self, trigger: Dict, flow_state: str, allow_proactive: bool) -> bool:
if trigger.get("should_respond"):
return True
if not allow_proactive:
return False
return flow_state in {"warming", "engaged", "deep_engaged"} and trigger.get("priority", 0) >= 0.35
return flow_state in {"deep_engaged"} and trigger.get("priority", 0) >= 0.65