优化ai_auto_response提示词与短回复策略:场景优先级、防冲突压缩、记忆相关性筛选、可配置长度限制
- 增加场景优先级规则,技术群优先结论与排查点,降低人设冲突\n- Dify 入参新增上下文压缩、画像与记忆去重、低相关记忆过滤\n- 回复后处理支持配置化长度阈值,并增加总字数上限裁剪\n- 新增 prompt_compact/reply 配置项,便于后续按群微调
This commit is contained in:
@@ -53,6 +53,33 @@ long_absent_member_days = 30
|
||||
memory_lookback_days = 180
|
||||
active_context_hours = 8
|
||||
|
||||
[reply]
|
||||
social_short_char_limit = 30
|
||||
social_short_total_limit = 30
|
||||
qa_fast_char_limit = 34
|
||||
qa_fast_total_limit = 34
|
||||
qa_with_context_sentence_limit = 2
|
||||
qa_with_context_chunk_limit = 2
|
||||
qa_with_context_char_limit = 32
|
||||
qa_with_context_total_limit = 55
|
||||
default_char_limit = 28
|
||||
default_total_limit = 28
|
||||
|
||||
[prompt_compact]
|
||||
group_profile_max_chars = 560
|
||||
group_profile_max_lines = 10
|
||||
context_max_chars = 900
|
||||
context_max_lines = 18
|
||||
recent_message_max_lines = 8
|
||||
recent_message_line_max_chars = 60
|
||||
at_member_profile_max_chars = 300
|
||||
at_member_profile_max_lines = 8
|
||||
member_memory_max_chars = 520
|
||||
member_memory_max_lines = 12
|
||||
memory_max_chars = 900
|
||||
memory_max_lines = 18
|
||||
strict_memory_relevance = true
|
||||
|
||||
[image]
|
||||
recent_followup_window_minutes = 5
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ def build_user_prompt(context: Dict, memory_hints: Dict) -> str:
|
||||
|
||||
rules = [
|
||||
"只处理当前发言对应的一个话题,优先直接回答当前发言。",
|
||||
"规则优先级:当前发言可验证信息 > 群场景约束 > 人设措辞润色。",
|
||||
"如果是明确问题,先给结论;只给第一层答案,不主动展开第二层解释。",
|
||||
length_rule,
|
||||
"能少说就少说,优先像群友随口接一句,不要写成说明文。",
|
||||
@@ -48,6 +49,8 @@ def build_user_prompt(context: Dict, memory_hints: Dict) -> str:
|
||||
rules.append("这次是对方点名互动,优先参考“本次@发起者画像”,语气贴近对方,但不要过度装熟。")
|
||||
if group_profile.get("knowledge_domain") == "dota":
|
||||
rules.append("如果对方问的是 Dota2 最近战绩、实时战绩、最新对局数据,要委婉说明现在没法提取,不要硬编。")
|
||||
if str(group_profile.get("mode", "") or "") in {"robotics", "openclaw"}:
|
||||
rules.append("当前是技术群场景,优先给结论+一个关键排查点,少情绪铺垫,不用夸张亲昵称呼。")
|
||||
|
||||
sections = [
|
||||
_section(
|
||||
|
||||
@@ -1,23 +1,46 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import List
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
def finalize_reply(response: str, reply_mode: str) -> List[str]:
|
||||
def finalize_reply(response: str, reply_mode: str, limits: Dict | None = None) -> List[str]:
|
||||
text = str(response or "").strip()
|
||||
if not text:
|
||||
return []
|
||||
text = re.sub(r"\s+", " ", text)
|
||||
text = text.replace("\n", " ").strip()
|
||||
|
||||
options = _resolve_limits(reply_mode, limits or {})
|
||||
if reply_mode == "social_short":
|
||||
return split_reply_chunks(text, sentence_limit=1, char_limit=30, chunk_limit=1, allow_clip_split=False)
|
||||
chunks = split_reply_chunks(
|
||||
text,
|
||||
sentence_limit=1,
|
||||
char_limit=options["char_limit"],
|
||||
chunk_limit=1,
|
||||
allow_clip_split=False,
|
||||
)
|
||||
return _clip_total_chars(chunks, options["total_limit"])
|
||||
if reply_mode == "qa_fast":
|
||||
return split_reply_chunks(text, sentence_limit=1, char_limit=34, chunk_limit=1, allow_clip_split=False)
|
||||
chunks = split_reply_chunks(
|
||||
text,
|
||||
sentence_limit=1,
|
||||
char_limit=options["char_limit"],
|
||||
chunk_limit=1,
|
||||
allow_clip_split=False,
|
||||
)
|
||||
return _clip_total_chars(chunks, options["total_limit"])
|
||||
if reply_mode == "qa_with_context":
|
||||
return split_reply_chunks(text, sentence_limit=2, char_limit=36, chunk_limit=2, allow_clip_split=False)
|
||||
return [take_first_sentence(text, 28).strip()]
|
||||
chunks = split_reply_chunks(
|
||||
text,
|
||||
sentence_limit=options["sentence_limit"],
|
||||
char_limit=options["char_limit"],
|
||||
chunk_limit=options["chunk_limit"],
|
||||
allow_clip_split=False,
|
||||
)
|
||||
return _clip_total_chars(chunks, options["total_limit"])
|
||||
chunks = [take_first_sentence(text, options["default_char_limit"]).strip()]
|
||||
return _clip_total_chars(chunks, options["total_limit"])
|
||||
|
||||
|
||||
def preview_text(text: str, limit: int = 80) -> str:
|
||||
@@ -33,7 +56,7 @@ def build_length_rule(reply_mode: str) -> str:
|
||||
if reply_mode == "qa_fast":
|
||||
return "优先1句话,尽量控制在34字内;先给结论,不要展开。"
|
||||
if reply_mode == "qa_with_context":
|
||||
return "优先1句;必要时最多2句,每句尽量控制在36字内,只给第一层答案。"
|
||||
return "优先1句;必要时最多2句,每句尽量控制在32字内,只给第一层答案。"
|
||||
return "尽量短,像群友临时接一句,不要长篇大论。"
|
||||
|
||||
|
||||
@@ -101,3 +124,67 @@ def _find_split_at(window: str, punctuation: str, lookback: int = 10) -> int:
|
||||
if window[idx] in punctuation:
|
||||
return idx
|
||||
return -1
|
||||
|
||||
|
||||
def _resolve_limits(reply_mode: str, limits: Dict) -> Dict[str, int]:
|
||||
mode_defaults = {
|
||||
"social_short": {"sentence_limit": 1, "char_limit": 30, "chunk_limit": 1, "total_limit": 30},
|
||||
"qa_fast": {"sentence_limit": 1, "char_limit": 34, "chunk_limit": 1, "total_limit": 34},
|
||||
"qa_with_context": {"sentence_limit": 2, "char_limit": 32, "chunk_limit": 2, "total_limit": 55},
|
||||
}
|
||||
defaults = mode_defaults.get(reply_mode, {"sentence_limit": 1, "char_limit": 28, "chunk_limit": 1, "total_limit": 28})
|
||||
if reply_mode == "social_short":
|
||||
return {
|
||||
"sentence_limit": 1,
|
||||
"chunk_limit": 1,
|
||||
"char_limit": max(int(limits.get("social_short_char_limit", defaults["char_limit"]) or defaults["char_limit"]), 8),
|
||||
"total_limit": max(int(limits.get("social_short_total_limit", defaults["total_limit"]) or defaults["total_limit"]), 8),
|
||||
"default_char_limit": max(int(limits.get("default_char_limit", 28) or 28), 8),
|
||||
}
|
||||
if reply_mode == "qa_fast":
|
||||
return {
|
||||
"sentence_limit": 1,
|
||||
"chunk_limit": 1,
|
||||
"char_limit": max(int(limits.get("qa_fast_char_limit", defaults["char_limit"]) or defaults["char_limit"]), 8),
|
||||
"total_limit": max(int(limits.get("qa_fast_total_limit", defaults["total_limit"]) or defaults["total_limit"]), 8),
|
||||
"default_char_limit": max(int(limits.get("default_char_limit", 28) or 28), 8),
|
||||
}
|
||||
if reply_mode == "qa_with_context":
|
||||
return {
|
||||
"sentence_limit": max(int(limits.get("qa_with_context_sentence_limit", defaults["sentence_limit"]) or defaults["sentence_limit"]), 1),
|
||||
"chunk_limit": max(int(limits.get("qa_with_context_chunk_limit", defaults["chunk_limit"]) or defaults["chunk_limit"]), 1),
|
||||
"char_limit": max(int(limits.get("qa_with_context_char_limit", defaults["char_limit"]) or defaults["char_limit"]), 8),
|
||||
"total_limit": max(int(limits.get("qa_with_context_total_limit", defaults["total_limit"]) or defaults["total_limit"]), 8),
|
||||
"default_char_limit": max(int(limits.get("default_char_limit", 28) or 28), 8),
|
||||
}
|
||||
return {
|
||||
"sentence_limit": 1,
|
||||
"chunk_limit": 1,
|
||||
"char_limit": max(int(limits.get("default_char_limit", defaults["char_limit"]) or defaults["char_limit"]), 8),
|
||||
"total_limit": max(int(limits.get("default_total_limit", defaults["total_limit"]) or defaults["total_limit"]), 8),
|
||||
"default_char_limit": max(int(limits.get("default_char_limit", 28) or 28), 8),
|
||||
}
|
||||
|
||||
|
||||
def _clip_total_chars(chunks: List[str], total_limit: int) -> List[str]:
|
||||
if not chunks:
|
||||
return []
|
||||
normalized_limit = max(int(total_limit or 0), 8)
|
||||
result: List[str] = []
|
||||
used = 0
|
||||
for chunk in chunks:
|
||||
current = str(chunk or "").strip()
|
||||
if not current:
|
||||
continue
|
||||
remain = normalized_limit - used
|
||||
if remain <= 0:
|
||||
break
|
||||
if len(current) <= remain:
|
||||
result.append(current)
|
||||
used += len(current)
|
||||
continue
|
||||
clipped = smart_clip(current, remain)
|
||||
if clipped:
|
||||
result.append(clipped)
|
||||
break
|
||||
return result
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
import asyncio
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
@@ -98,6 +99,8 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
||||
self.queue_worker_count = 1
|
||||
self.queue_maxsize = 200
|
||||
self.queue_workers: List[asyncio.Task] = []
|
||||
self.reply_limits: Dict[str, Any] = {}
|
||||
self.prompt_compact_config: Dict[str, Any] = {}
|
||||
|
||||
def initialize(self, context: Dict[str, Any]) -> bool:
|
||||
self.LOG = logger
|
||||
@@ -134,6 +137,8 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
||||
self.filters = self._config.get("filters", {}) or {}
|
||||
self.mode_config = self._config.get("mode", {}) or {}
|
||||
self.cooldown_config = self._config.get("cooldown", {}) or {}
|
||||
self.reply_limits = self._config.get("reply", {}) or {}
|
||||
self.prompt_compact_config = self._config.get("prompt_compact", {}) or {}
|
||||
self.cooldown = CooldownManager(self.cooldown_config)
|
||||
self.image_config = self._config.get("image", {}) or {}
|
||||
self.spam_config = self._config.get("spam_guard", {}) or {}
|
||||
@@ -573,7 +578,7 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
||||
)
|
||||
return False, "llm_empty_reply"
|
||||
|
||||
reply_chunks = finalize_reply(reply_text, reply_mode)
|
||||
reply_chunks = finalize_reply(reply_text, reply_mode, self.reply_limits)
|
||||
final_response_text = "\n".join(reply_chunks)
|
||||
reply_dedup_expiry = int(self.cooldown_config.get("reply_dedup_window_sec", 90))
|
||||
if not reply_chunks or self.dedup.should_skip_duplicate_reply(
|
||||
@@ -753,28 +758,68 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
||||
files: List[Dict[str, Any]],
|
||||
) -> Dict[str, Any]:
|
||||
persona = self._compose_dify_persona_text(group_profile, context)
|
||||
group_profile_text = str(context.get("group_profile_prompt", "") or "").strip() or "当前群没有特殊画像。"
|
||||
group_profile_text = self._compact_text(
|
||||
str(context.get("group_profile_prompt", "") or "").strip() or "当前群没有特殊画像。",
|
||||
max_chars=int(self.prompt_compact_config.get("group_profile_max_chars", 560) or 560),
|
||||
max_lines=int(self.prompt_compact_config.get("group_profile_max_lines", 10) or 10),
|
||||
)
|
||||
|
||||
context_parts = [
|
||||
self._string_block("最近上下文", self._join_recent_messages(context)),
|
||||
self._string_block(
|
||||
"最近上下文",
|
||||
self._join_recent_messages(
|
||||
context,
|
||||
max_lines=int(self.prompt_compact_config.get("recent_message_max_lines", 8) or 8),
|
||||
max_line_chars=int(self.prompt_compact_config.get("recent_message_line_max_chars", 60) or 60),
|
||||
),
|
||||
),
|
||||
self._string_block("引用补充", context.get("quote_prompt", "")),
|
||||
self._string_block("图片补充", context.get("image_prompt", "")),
|
||||
self._string_block("图片谨慎提示", context.get("image_safety_prompt", "")),
|
||||
]
|
||||
context_text = "\n\n".join([part for part in context_parts if part]).strip() or "无额外上下文。"
|
||||
context_text = self._compact_text(
|
||||
"\n\n".join([part for part in context_parts if part]).strip() or "无额外上下文。",
|
||||
max_chars=int(self.prompt_compact_config.get("context_max_chars", 900) or 900),
|
||||
max_lines=int(self.prompt_compact_config.get("context_max_lines", 18) or 18),
|
||||
)
|
||||
|
||||
at_member_profile_text = self._compact_text(
|
||||
str(context.get("at_member_profile_prompt", "") or ""),
|
||||
max_chars=int(self.prompt_compact_config.get("at_member_profile_max_chars", 300) or 300),
|
||||
max_lines=int(self.prompt_compact_config.get("at_member_profile_max_lines", 8) or 8),
|
||||
)
|
||||
member_memory_text = self._compact_text(
|
||||
str(context.get("memory_prompt", "") or ""),
|
||||
max_chars=int(self.prompt_compact_config.get("member_memory_max_chars", 520) or 520),
|
||||
max_lines=int(self.prompt_compact_config.get("member_memory_max_lines", 12) or 12),
|
||||
)
|
||||
member_memory_text = self._remove_overlap_lines(member_memory_text, at_member_profile_text)
|
||||
|
||||
memory_parts = [
|
||||
self._string_block("本次@发起者画像(优先)", context.get("at_member_profile_prompt", "")),
|
||||
self._string_block("成员记忆", context.get("memory_prompt", "")),
|
||||
self._string_block("群关系记忆", context.get("social_memory_prompt", "")),
|
||||
self._string_block("群事实记忆", context.get("group_facts_prompt", "")),
|
||||
self._string_block("向量召回记忆", context.get("vector_memory_prompt", "")),
|
||||
self._string_block("本次@发起者画像(优先)", at_member_profile_text),
|
||||
self._string_block("成员记忆", member_memory_text),
|
||||
self._string_block(
|
||||
"群关系记忆",
|
||||
self._memory_if_relevant(content, str(context.get("social_memory_prompt", "") or ""), "social"),
|
||||
),
|
||||
self._string_block(
|
||||
"群事实记忆",
|
||||
self._memory_if_relevant(content, str(context.get("group_facts_prompt", "") or ""), "facts"),
|
||||
),
|
||||
self._string_block(
|
||||
"向量召回记忆",
|
||||
self._memory_if_relevant(content, str(context.get("vector_memory_prompt", "") or ""), "vector"),
|
||||
),
|
||||
self._string_block(
|
||||
"回归状态",
|
||||
str(memory_hints.get("returning_member_state", "") or "").strip() or "none",
|
||||
),
|
||||
]
|
||||
memory_text = "\n\n".join([part for part in memory_parts if part]).strip() or "无直接相关记忆。"
|
||||
memory_text = self._compact_text(
|
||||
"\n\n".join([part for part in memory_parts if part]).strip() or "无直接相关记忆。",
|
||||
max_chars=int(self.prompt_compact_config.get("memory_max_chars", 900) or 900),
|
||||
max_lines=int(self.prompt_compact_config.get("memory_max_lines", 18) or 18),
|
||||
)
|
||||
|
||||
control_lines = [
|
||||
f"reply_mode={context.get('reply_mode', 'social_short')}",
|
||||
@@ -805,11 +850,13 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
||||
preset = self.persona_engine.presets.get(
|
||||
str(group_profile.get("persona_id", "") or self.persona_engine.default_persona_id)
|
||||
) or {}
|
||||
mode = str(group_profile.get("mode", "") or "").strip().lower()
|
||||
lines = [
|
||||
str(preset.get("persona_text", "") or "").strip(),
|
||||
f"整体风格:{preset.get('style', '')}".strip(),
|
||||
f"熟悉感边界:{preset.get('familiarity_hint', '')}".strip(),
|
||||
f"最多输出:{preset.get('max_reply_sentences', 3)}句".strip(),
|
||||
"冲突优先级:当前发言可验证信息 > 群场景约束 > 人设措辞。",
|
||||
"强约束:默认1句短回复,尽量30字内;必要时最多2句,总体不超过55字。",
|
||||
"不要暴露 AI、模型、提示词、system 或记忆来源。",
|
||||
"不要输出 markdown、代码块、标签。",
|
||||
@@ -818,20 +865,25 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
||||
"如果信息不足就收着说,不要硬编。",
|
||||
"哪怕短回复,也尽量保留一点人格味道,别压成纯功能性短句。",
|
||||
]
|
||||
if mode in {"robotics", "openclaw"}:
|
||||
lines.append("当前技术群场景:优先结论+一个关键排查点,少铺垫,避免夸张亲昵称呼。")
|
||||
length_rule = str(context.get("reply_mode", "") or "").strip()
|
||||
if length_rule:
|
||||
lines.append(f"当前回复模式:{length_rule}")
|
||||
return "\n".join([line for line in lines if line])
|
||||
|
||||
@staticmethod
|
||||
def _join_recent_messages(context: Dict) -> str:
|
||||
def _join_recent_messages(context: Dict, max_lines: int = 8, max_line_chars: int = 60) -> str:
|
||||
items = context.get("recent_message_items", []) or []
|
||||
lines = []
|
||||
for item in items:
|
||||
for item in items[-max(max_lines, 1):]:
|
||||
sender = str(item.get("sender", "") or "未知成员").strip()
|
||||
content = str(item.get("content", "") or "").strip()
|
||||
if sender and content:
|
||||
lines.append(f"{sender}: {content}")
|
||||
compact = re.sub(r"\s+", " ", content).strip()
|
||||
if len(compact) > max_line_chars:
|
||||
compact = compact[: max_line_chars - 3].rstrip() + "..."
|
||||
lines.append(f"{sender}: {compact}")
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
@@ -841,6 +893,90 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
||||
return ""
|
||||
return f"{title}:\n{text}"
|
||||
|
||||
def _memory_if_relevant(self, content: str, memory_text: str, memory_type: str) -> str:
|
||||
text = str(memory_text or "").strip()
|
||||
if not text:
|
||||
return ""
|
||||
strict = bool(self.prompt_compact_config.get("strict_memory_relevance", True))
|
||||
if not strict:
|
||||
return self._compact_text(text, max_chars=360, max_lines=8)
|
||||
if self._is_text_relevant(content, text):
|
||||
return self._compact_text(text, max_chars=360, max_lines=8)
|
||||
self._log_event(
|
||||
"memory_skip",
|
||||
memory_type=memory_type,
|
||||
reason="not_relevant",
|
||||
content_preview=preview_text(content, 36),
|
||||
)
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _compact_text(text: str, max_chars: int, max_lines: int) -> str:
|
||||
raw = str(text or "").strip()
|
||||
if not raw:
|
||||
return ""
|
||||
lines = [re.sub(r"\s+", " ", line).strip() for line in raw.splitlines() if line and line.strip()]
|
||||
if max_lines > 0 and len(lines) > max_lines:
|
||||
lines = lines[:max_lines]
|
||||
merged = "\n".join(lines).strip()
|
||||
if len(merged) <= max_chars:
|
||||
return merged
|
||||
return merged[: max_chars - 3].rstrip(" ,,;;。.!?!?::") + "..."
|
||||
|
||||
@staticmethod
|
||||
def _remove_overlap_lines(base_text: str, reference_text: str) -> str:
|
||||
base_lines = [line.strip() for line in str(base_text or "").splitlines() if line.strip()]
|
||||
if not base_lines:
|
||||
return ""
|
||||
refs = [line.strip() for line in str(reference_text or "").splitlines() if line.strip()]
|
||||
if not refs:
|
||||
return "\n".join(base_lines)
|
||||
|
||||
ref_norm = [AIAutoResponsePlugin._normalize_overlap_token(line) for line in refs]
|
||||
kept: List[str] = []
|
||||
for line in base_lines:
|
||||
norm = AIAutoResponsePlugin._normalize_overlap_token(line)
|
||||
if not norm:
|
||||
continue
|
||||
overlapped = False
|
||||
for item in ref_norm:
|
||||
if not item:
|
||||
continue
|
||||
if norm == item or norm in item or item in norm:
|
||||
overlapped = True
|
||||
break
|
||||
if not overlapped:
|
||||
kept.append(line)
|
||||
return "\n".join(kept)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_overlap_token(text: str) -> str:
|
||||
value = str(text or "").strip().lower()
|
||||
value = re.sub(r"[::,,;;。.!?!?\-\s]", "", value)
|
||||
return value
|
||||
|
||||
@staticmethod
|
||||
def _is_text_relevant(content: str, memory_text: str) -> bool:
|
||||
content_tokens = AIAutoResponsePlugin._extract_relevance_tokens(content)
|
||||
memory_tokens = AIAutoResponsePlugin._extract_relevance_tokens(memory_text)
|
||||
if not content_tokens or not memory_tokens:
|
||||
return False
|
||||
overlap = content_tokens & memory_tokens
|
||||
return len(overlap) >= 1
|
||||
|
||||
@staticmethod
|
||||
def _extract_relevance_tokens(text: str) -> set[str]:
|
||||
raw = str(text or "").lower()
|
||||
tokens = set(re.findall(r"[a-z0-9_\\-]{2,}", raw))
|
||||
zh_keywords = [
|
||||
"机器人", "插件", "部署", "报错", "配置", "接口", "脚本", "微信", "群", "记忆", "成本",
|
||||
"价格", "api", "模型", "功能", "菜单", "指令", "回复", "引用", "上下文",
|
||||
]
|
||||
for keyword in zh_keywords:
|
||||
if keyword in raw:
|
||||
tokens.add(keyword)
|
||||
return tokens
|
||||
|
||||
def _build_dify_image_files(self, *, user_id: str, image_urls: List[str]) -> List[Dict[str, Any]]:
|
||||
files: List[Dict[str, Any]] = []
|
||||
for index, image_url in enumerate(image_urls or [], start=1):
|
||||
|
||||
Reference in New Issue
Block a user