拆分群昵称与正文避免话题识别被昵称污染

This commit is contained in:
liuwei
2026-04-24 15:19:14 +08:00
parent 5dc72bf7d2
commit 66b0fe16da
5 changed files with 104 additions and 10 deletions

View File

@@ -958,7 +958,8 @@ class AIAutoResponsePlugin(MessagePluginInterface):
"group_profile": group_profile_text,
"context": context_text,
"memory": memory_text,
"current_message": f"{sender_name}: {content}",
# 当前消息不再用“昵称: 正文”的混合写法,避免模型把昵称词汇当成当前话题的一部分。
"current_message": self._format_current_message_block(sender_name, content),
"control": "\n".join(control_lines),
"images": files,
}
@@ -1005,12 +1006,54 @@ class AIAutoResponsePlugin(MessagePluginInterface):
sender = str(item.get("sender", "") or "未知成员").strip()
content = str(item.get("content", "") or "").strip()
if sender and content:
compact = re.sub(r"\s+", " ", content).strip()
if len(compact) > max_line_chars:
compact = compact[: max_line_chars - 3].rstrip() + "..."
lines.append(f"{sender}: {compact}")
# 最近消息统一改成“发言人字段 + 正文字段”的单行结构化格式:
# 1. 保留 30 条上下文时,仍然是一条消息一行,不会因为多行格式把上下文窗口挤爆;
# 2. 模型可以继续感知是谁说的,但更不容易把昵称里的词误当成话题正文;
# 3. 如果消息里本身带 @ 标记,也显式单列出来,减少对正文理解的污染。
lines.append(
AIAutoResponsePlugin._format_recent_message_line(
idx=int(item.get("idx", 0) or 0),
sender_name=sender,
content=content,
max_line_chars=max_line_chars,
is_at=bool(item.get("is_at")),
)
)
return "\n".join(lines)
@staticmethod
def _sanitize_inline_message_field(value: str, max_chars: int) -> str:
# 这里专门给传模型的“单行结构化消息”做字段清洗,避免换行和分隔符把结构打散。
text = re.sub(r"\s+", " ", str(value or "")).strip()
text = text.replace("|", "")
if len(text) > max_chars:
return text[: max_chars - 3].rstrip() + "..."
return text
@classmethod
def _format_recent_message_line(
cls,
*,
idx: int,
sender_name: str,
content: str,
max_line_chars: int,
is_at: bool = False,
) -> str:
sender = cls._sanitize_inline_message_field(sender_name, max_chars=24) or "未知成员"
body = cls._sanitize_inline_message_field(content, max_chars=max(max_line_chars, 20))
parts = [f"[{max(idx, 1):02d}]", f"发言人={sender}", f"正文={body}"]
if is_at:
parts.append("@bot=Y")
return " | ".join(parts)
@classmethod
def _format_current_message_block(cls, sender_name: str, content: str) -> str:
# 当前消息使用两行结构化文本,让工作流里的模型更容易区分“谁说的”和“说了什么”。
sender = cls._sanitize_inline_message_field(sender_name, max_chars=24) or "未知成员"
body = cls._sanitize_inline_message_field(content, max_chars=500)
return f"发言人={sender}\n正文={body}"
@staticmethod
def _string_block(title: str, value: Any) -> str:
text = str(value or "").strip()