diff --git a/plugins/ai_auto_response/config.toml b/plugins/ai_auto_response/config.toml index aa1b61a..0595601 100644 --- a/plugins/ai_auto_response/config.toml +++ b/plugins/ai_auto_response/config.toml @@ -53,6 +53,33 @@ long_absent_member_days = 30 memory_lookback_days = 180 active_context_hours = 8 +[reply] +social_short_char_limit = 30 +social_short_total_limit = 30 +qa_fast_char_limit = 34 +qa_fast_total_limit = 34 +qa_with_context_sentence_limit = 2 +qa_with_context_chunk_limit = 2 +qa_with_context_char_limit = 32 +qa_with_context_total_limit = 55 +default_char_limit = 28 +default_total_limit = 28 + +[prompt_compact] +group_profile_max_chars = 560 +group_profile_max_lines = 10 +context_max_chars = 900 +context_max_lines = 18 +recent_message_max_lines = 8 +recent_message_line_max_chars = 60 +at_member_profile_max_chars = 300 +at_member_profile_max_lines = 8 +member_memory_max_chars = 520 +member_memory_max_lines = 12 +memory_max_chars = 900 +memory_max_lines = 18 +strict_memory_relevance = true + [image] recent_followup_window_minutes = 5 diff --git a/plugins/ai_auto_response/core/prompt_builder.py b/plugins/ai_auto_response/core/prompt_builder.py index 732b5d7..98d5b66 100644 --- a/plugins/ai_auto_response/core/prompt_builder.py +++ b/plugins/ai_auto_response/core/prompt_builder.py @@ -25,6 +25,7 @@ def build_user_prompt(context: Dict, memory_hints: Dict) -> str: rules = [ "只处理当前发言对应的一个话题,优先直接回答当前发言。", + "规则优先级:当前发言可验证信息 > 群场景约束 > 人设措辞润色。", "如果是明确问题,先给结论;只给第一层答案,不主动展开第二层解释。", length_rule, "能少说就少说,优先像群友随口接一句,不要写成说明文。", @@ -48,6 +49,8 @@ def build_user_prompt(context: Dict, memory_hints: Dict) -> str: rules.append("这次是对方点名互动,优先参考“本次@发起者画像”,语气贴近对方,但不要过度装熟。") if group_profile.get("knowledge_domain") == "dota": rules.append("如果对方问的是 Dota2 最近战绩、实时战绩、最新对局数据,要委婉说明现在没法提取,不要硬编。") + if str(group_profile.get("mode", "") or "") in {"robotics", "openclaw"}: + rules.append("当前是技术群场景,优先给结论+一个关键排查点,少情绪铺垫,不用夸张亲昵称呼。") sections = [ _section( diff --git a/plugins/ai_auto_response/core/reply_formatter.py b/plugins/ai_auto_response/core/reply_formatter.py index 13ea5d8..7a10c9b 100644 --- a/plugins/ai_auto_response/core/reply_formatter.py +++ b/plugins/ai_auto_response/core/reply_formatter.py @@ -1,23 +1,46 @@ from __future__ import annotations import re -from typing import List +from typing import Dict, List -def finalize_reply(response: str, reply_mode: str) -> List[str]: +def finalize_reply(response: str, reply_mode: str, limits: Dict | None = None) -> List[str]: text = str(response or "").strip() if not text: return [] text = re.sub(r"\s+", " ", text) text = text.replace("\n", " ").strip() + options = _resolve_limits(reply_mode, limits or {}) if reply_mode == "social_short": - return split_reply_chunks(text, sentence_limit=1, char_limit=30, chunk_limit=1, allow_clip_split=False) + chunks = split_reply_chunks( + text, + sentence_limit=1, + char_limit=options["char_limit"], + chunk_limit=1, + allow_clip_split=False, + ) + return _clip_total_chars(chunks, options["total_limit"]) if reply_mode == "qa_fast": - return split_reply_chunks(text, sentence_limit=1, char_limit=34, chunk_limit=1, allow_clip_split=False) + chunks = split_reply_chunks( + text, + sentence_limit=1, + char_limit=options["char_limit"], + chunk_limit=1, + allow_clip_split=False, + ) + return _clip_total_chars(chunks, options["total_limit"]) if reply_mode == "qa_with_context": - return split_reply_chunks(text, sentence_limit=2, char_limit=36, chunk_limit=2, allow_clip_split=False) - return [take_first_sentence(text, 28).strip()] + chunks = split_reply_chunks( + text, + sentence_limit=options["sentence_limit"], + char_limit=options["char_limit"], + chunk_limit=options["chunk_limit"], + allow_clip_split=False, + ) + return _clip_total_chars(chunks, options["total_limit"]) + chunks = [take_first_sentence(text, options["default_char_limit"]).strip()] + return _clip_total_chars(chunks, options["total_limit"]) def preview_text(text: str, limit: int = 80) -> str: @@ -33,7 +56,7 @@ def build_length_rule(reply_mode: str) -> str: if reply_mode == "qa_fast": return "优先1句话,尽量控制在34字内;先给结论,不要展开。" if reply_mode == "qa_with_context": - return "优先1句;必要时最多2句,每句尽量控制在36字内,只给第一层答案。" + return "优先1句;必要时最多2句,每句尽量控制在32字内,只给第一层答案。" return "尽量短,像群友临时接一句,不要长篇大论。" @@ -101,3 +124,67 @@ def _find_split_at(window: str, punctuation: str, lookback: int = 10) -> int: if window[idx] in punctuation: return idx return -1 + + +def _resolve_limits(reply_mode: str, limits: Dict) -> Dict[str, int]: + mode_defaults = { + "social_short": {"sentence_limit": 1, "char_limit": 30, "chunk_limit": 1, "total_limit": 30}, + "qa_fast": {"sentence_limit": 1, "char_limit": 34, "chunk_limit": 1, "total_limit": 34}, + "qa_with_context": {"sentence_limit": 2, "char_limit": 32, "chunk_limit": 2, "total_limit": 55}, + } + defaults = mode_defaults.get(reply_mode, {"sentence_limit": 1, "char_limit": 28, "chunk_limit": 1, "total_limit": 28}) + if reply_mode == "social_short": + return { + "sentence_limit": 1, + "chunk_limit": 1, + "char_limit": max(int(limits.get("social_short_char_limit", defaults["char_limit"]) or defaults["char_limit"]), 8), + "total_limit": max(int(limits.get("social_short_total_limit", defaults["total_limit"]) or defaults["total_limit"]), 8), + "default_char_limit": max(int(limits.get("default_char_limit", 28) or 28), 8), + } + if reply_mode == "qa_fast": + return { + "sentence_limit": 1, + "chunk_limit": 1, + "char_limit": max(int(limits.get("qa_fast_char_limit", defaults["char_limit"]) or defaults["char_limit"]), 8), + "total_limit": max(int(limits.get("qa_fast_total_limit", defaults["total_limit"]) or defaults["total_limit"]), 8), + "default_char_limit": max(int(limits.get("default_char_limit", 28) or 28), 8), + } + if reply_mode == "qa_with_context": + return { + "sentence_limit": max(int(limits.get("qa_with_context_sentence_limit", defaults["sentence_limit"]) or defaults["sentence_limit"]), 1), + "chunk_limit": max(int(limits.get("qa_with_context_chunk_limit", defaults["chunk_limit"]) or defaults["chunk_limit"]), 1), + "char_limit": max(int(limits.get("qa_with_context_char_limit", defaults["char_limit"]) or defaults["char_limit"]), 8), + "total_limit": max(int(limits.get("qa_with_context_total_limit", defaults["total_limit"]) or defaults["total_limit"]), 8), + "default_char_limit": max(int(limits.get("default_char_limit", 28) or 28), 8), + } + return { + "sentence_limit": 1, + "chunk_limit": 1, + "char_limit": max(int(limits.get("default_char_limit", defaults["char_limit"]) or defaults["char_limit"]), 8), + "total_limit": max(int(limits.get("default_total_limit", defaults["total_limit"]) or defaults["total_limit"]), 8), + "default_char_limit": max(int(limits.get("default_char_limit", 28) or 28), 8), + } + + +def _clip_total_chars(chunks: List[str], total_limit: int) -> List[str]: + if not chunks: + return [] + normalized_limit = max(int(total_limit or 0), 8) + result: List[str] = [] + used = 0 + for chunk in chunks: + current = str(chunk or "").strip() + if not current: + continue + remain = normalized_limit - used + if remain <= 0: + break + if len(current) <= remain: + result.append(current) + used += len(current) + continue + clipped = smart_clip(current, remain) + if clipped: + result.append(clipped) + break + return result diff --git a/plugins/ai_auto_response/main.py b/plugins/ai_auto_response/main.py index 4e17c8d..9161a60 100644 --- a/plugins/ai_auto_response/main.py +++ b/plugins/ai_auto_response/main.py @@ -1,5 +1,6 @@ from __future__ import annotations import asyncio +import re import time import xml.etree.ElementTree as ET from typing import Any, Dict, List, Optional, Tuple @@ -98,6 +99,8 @@ class AIAutoResponsePlugin(MessagePluginInterface): self.queue_worker_count = 1 self.queue_maxsize = 200 self.queue_workers: List[asyncio.Task] = [] + self.reply_limits: Dict[str, Any] = {} + self.prompt_compact_config: Dict[str, Any] = {} def initialize(self, context: Dict[str, Any]) -> bool: self.LOG = logger @@ -134,6 +137,8 @@ class AIAutoResponsePlugin(MessagePluginInterface): self.filters = self._config.get("filters", {}) or {} self.mode_config = self._config.get("mode", {}) or {} self.cooldown_config = self._config.get("cooldown", {}) or {} + self.reply_limits = self._config.get("reply", {}) or {} + self.prompt_compact_config = self._config.get("prompt_compact", {}) or {} self.cooldown = CooldownManager(self.cooldown_config) self.image_config = self._config.get("image", {}) or {} self.spam_config = self._config.get("spam_guard", {}) or {} @@ -573,7 +578,7 @@ class AIAutoResponsePlugin(MessagePluginInterface): ) return False, "llm_empty_reply" - reply_chunks = finalize_reply(reply_text, reply_mode) + reply_chunks = finalize_reply(reply_text, reply_mode, self.reply_limits) final_response_text = "\n".join(reply_chunks) reply_dedup_expiry = int(self.cooldown_config.get("reply_dedup_window_sec", 90)) if not reply_chunks or self.dedup.should_skip_duplicate_reply( @@ -753,28 +758,68 @@ class AIAutoResponsePlugin(MessagePluginInterface): files: List[Dict[str, Any]], ) -> Dict[str, Any]: persona = self._compose_dify_persona_text(group_profile, context) - group_profile_text = str(context.get("group_profile_prompt", "") or "").strip() or "当前群没有特殊画像。" + group_profile_text = self._compact_text( + str(context.get("group_profile_prompt", "") or "").strip() or "当前群没有特殊画像。", + max_chars=int(self.prompt_compact_config.get("group_profile_max_chars", 560) or 560), + max_lines=int(self.prompt_compact_config.get("group_profile_max_lines", 10) or 10), + ) context_parts = [ - self._string_block("最近上下文", self._join_recent_messages(context)), + self._string_block( + "最近上下文", + self._join_recent_messages( + context, + max_lines=int(self.prompt_compact_config.get("recent_message_max_lines", 8) or 8), + max_line_chars=int(self.prompt_compact_config.get("recent_message_line_max_chars", 60) or 60), + ), + ), self._string_block("引用补充", context.get("quote_prompt", "")), self._string_block("图片补充", context.get("image_prompt", "")), self._string_block("图片谨慎提示", context.get("image_safety_prompt", "")), ] - context_text = "\n\n".join([part for part in context_parts if part]).strip() or "无额外上下文。" + context_text = self._compact_text( + "\n\n".join([part for part in context_parts if part]).strip() or "无额外上下文。", + max_chars=int(self.prompt_compact_config.get("context_max_chars", 900) or 900), + max_lines=int(self.prompt_compact_config.get("context_max_lines", 18) or 18), + ) + + at_member_profile_text = self._compact_text( + str(context.get("at_member_profile_prompt", "") or ""), + max_chars=int(self.prompt_compact_config.get("at_member_profile_max_chars", 300) or 300), + max_lines=int(self.prompt_compact_config.get("at_member_profile_max_lines", 8) or 8), + ) + member_memory_text = self._compact_text( + str(context.get("memory_prompt", "") or ""), + max_chars=int(self.prompt_compact_config.get("member_memory_max_chars", 520) or 520), + max_lines=int(self.prompt_compact_config.get("member_memory_max_lines", 12) or 12), + ) + member_memory_text = self._remove_overlap_lines(member_memory_text, at_member_profile_text) memory_parts = [ - self._string_block("本次@发起者画像(优先)", context.get("at_member_profile_prompt", "")), - self._string_block("成员记忆", context.get("memory_prompt", "")), - self._string_block("群关系记忆", context.get("social_memory_prompt", "")), - self._string_block("群事实记忆", context.get("group_facts_prompt", "")), - self._string_block("向量召回记忆", context.get("vector_memory_prompt", "")), + self._string_block("本次@发起者画像(优先)", at_member_profile_text), + self._string_block("成员记忆", member_memory_text), + self._string_block( + "群关系记忆", + self._memory_if_relevant(content, str(context.get("social_memory_prompt", "") or ""), "social"), + ), + self._string_block( + "群事实记忆", + self._memory_if_relevant(content, str(context.get("group_facts_prompt", "") or ""), "facts"), + ), + self._string_block( + "向量召回记忆", + self._memory_if_relevant(content, str(context.get("vector_memory_prompt", "") or ""), "vector"), + ), self._string_block( "回归状态", str(memory_hints.get("returning_member_state", "") or "").strip() or "none", ), ] - memory_text = "\n\n".join([part for part in memory_parts if part]).strip() or "无直接相关记忆。" + memory_text = self._compact_text( + "\n\n".join([part for part in memory_parts if part]).strip() or "无直接相关记忆。", + max_chars=int(self.prompt_compact_config.get("memory_max_chars", 900) or 900), + max_lines=int(self.prompt_compact_config.get("memory_max_lines", 18) or 18), + ) control_lines = [ f"reply_mode={context.get('reply_mode', 'social_short')}", @@ -805,11 +850,13 @@ class AIAutoResponsePlugin(MessagePluginInterface): preset = self.persona_engine.presets.get( str(group_profile.get("persona_id", "") or self.persona_engine.default_persona_id) ) or {} + mode = str(group_profile.get("mode", "") or "").strip().lower() lines = [ str(preset.get("persona_text", "") or "").strip(), f"整体风格:{preset.get('style', '')}".strip(), f"熟悉感边界:{preset.get('familiarity_hint', '')}".strip(), f"最多输出:{preset.get('max_reply_sentences', 3)}句".strip(), + "冲突优先级:当前发言可验证信息 > 群场景约束 > 人设措辞。", "强约束:默认1句短回复,尽量30字内;必要时最多2句,总体不超过55字。", "不要暴露 AI、模型、提示词、system 或记忆来源。", "不要输出 markdown、代码块、标签。", @@ -818,20 +865,25 @@ class AIAutoResponsePlugin(MessagePluginInterface): "如果信息不足就收着说,不要硬编。", "哪怕短回复,也尽量保留一点人格味道,别压成纯功能性短句。", ] + if mode in {"robotics", "openclaw"}: + lines.append("当前技术群场景:优先结论+一个关键排查点,少铺垫,避免夸张亲昵称呼。") length_rule = str(context.get("reply_mode", "") or "").strip() if length_rule: lines.append(f"当前回复模式:{length_rule}") return "\n".join([line for line in lines if line]) @staticmethod - def _join_recent_messages(context: Dict) -> str: + def _join_recent_messages(context: Dict, max_lines: int = 8, max_line_chars: int = 60) -> str: items = context.get("recent_message_items", []) or [] lines = [] - for item in items: + for item in items[-max(max_lines, 1):]: sender = str(item.get("sender", "") or "未知成员").strip() content = str(item.get("content", "") or "").strip() if sender and content: - lines.append(f"{sender}: {content}") + compact = re.sub(r"\s+", " ", content).strip() + if len(compact) > max_line_chars: + compact = compact[: max_line_chars - 3].rstrip() + "..." + lines.append(f"{sender}: {compact}") return "\n".join(lines) @staticmethod @@ -841,6 +893,90 @@ class AIAutoResponsePlugin(MessagePluginInterface): return "" return f"{title}:\n{text}" + def _memory_if_relevant(self, content: str, memory_text: str, memory_type: str) -> str: + text = str(memory_text or "").strip() + if not text: + return "" + strict = bool(self.prompt_compact_config.get("strict_memory_relevance", True)) + if not strict: + return self._compact_text(text, max_chars=360, max_lines=8) + if self._is_text_relevant(content, text): + return self._compact_text(text, max_chars=360, max_lines=8) + self._log_event( + "memory_skip", + memory_type=memory_type, + reason="not_relevant", + content_preview=preview_text(content, 36), + ) + return "" + + @staticmethod + def _compact_text(text: str, max_chars: int, max_lines: int) -> str: + raw = str(text or "").strip() + if not raw: + return "" + lines = [re.sub(r"\s+", " ", line).strip() for line in raw.splitlines() if line and line.strip()] + if max_lines > 0 and len(lines) > max_lines: + lines = lines[:max_lines] + merged = "\n".join(lines).strip() + if len(merged) <= max_chars: + return merged + return merged[: max_chars - 3].rstrip(" ,,;;。.!?!?::") + "..." + + @staticmethod + def _remove_overlap_lines(base_text: str, reference_text: str) -> str: + base_lines = [line.strip() for line in str(base_text or "").splitlines() if line.strip()] + if not base_lines: + return "" + refs = [line.strip() for line in str(reference_text or "").splitlines() if line.strip()] + if not refs: + return "\n".join(base_lines) + + ref_norm = [AIAutoResponsePlugin._normalize_overlap_token(line) for line in refs] + kept: List[str] = [] + for line in base_lines: + norm = AIAutoResponsePlugin._normalize_overlap_token(line) + if not norm: + continue + overlapped = False + for item in ref_norm: + if not item: + continue + if norm == item or norm in item or item in norm: + overlapped = True + break + if not overlapped: + kept.append(line) + return "\n".join(kept) + + @staticmethod + def _normalize_overlap_token(text: str) -> str: + value = str(text or "").strip().lower() + value = re.sub(r"[::,,;;。.!?!?\-\s]", "", value) + return value + + @staticmethod + def _is_text_relevant(content: str, memory_text: str) -> bool: + content_tokens = AIAutoResponsePlugin._extract_relevance_tokens(content) + memory_tokens = AIAutoResponsePlugin._extract_relevance_tokens(memory_text) + if not content_tokens or not memory_tokens: + return False + overlap = content_tokens & memory_tokens + return len(overlap) >= 1 + + @staticmethod + def _extract_relevance_tokens(text: str) -> set[str]: + raw = str(text or "").lower() + tokens = set(re.findall(r"[a-z0-9_\\-]{2,}", raw)) + zh_keywords = [ + "机器人", "插件", "部署", "报错", "配置", "接口", "脚本", "微信", "群", "记忆", "成本", + "价格", "api", "模型", "功能", "菜单", "指令", "回复", "引用", "上下文", + ] + for keyword in zh_keywords: + if keyword in raw: + tokens.add(keyword) + return tokens + def _build_dify_image_files(self, *, user_id: str, image_urls: List[str]) -> List[Dict[str, Any]]: files: List[Dict[str, Any]] = [] for index, image_url in enumerate(image_urls or [], start=1):