拆分群昵称与正文避免话题识别被昵称污染
This commit is contained in:
@@ -74,7 +74,7 @@ group_profile_max_lines = 6
|
||||
# 1. recent_message_max_lines 提到 30,避免“窗口明明有 30,提示词里只留下 4 条”;
|
||||
# 2. context_max_lines/context_max_chars 一起抬高,避免最近消息刚拼进去又被整体截断;
|
||||
# 3. recent_message_line_max_chars 稍微放宽,让模型能看到每条消息更多细节,但仍避免单条刷屏。
|
||||
context_max_chars = 4200
|
||||
context_max_chars = 5600
|
||||
context_max_lines = 40
|
||||
recent_message_max_lines = 30
|
||||
recent_message_line_max_chars = 100
|
||||
|
||||
@@ -30,11 +30,19 @@ class ContextBuilder:
|
||||
) -> Dict:
|
||||
selected_messages = self._select_recent_messages(recent_messages, sender, content, quote_context or {})
|
||||
recent_lines = []
|
||||
for item in selected_messages:
|
||||
for idx, item in enumerate(selected_messages, start=1):
|
||||
msg_sender = item.get("sender_name") or item.get("sender") or "未知成员"
|
||||
msg_content = item.get("content") or item.get("message") or ""
|
||||
if msg_content:
|
||||
recent_lines.append(f"{msg_sender}: {msg_content}")
|
||||
# 这里把“发言人”和“正文”拆开保存,避免后续模型把昵称词汇误当成讨论主题。
|
||||
recent_lines.append(
|
||||
self._format_recent_message_line(
|
||||
idx=idx,
|
||||
sender_name=str(msg_sender),
|
||||
content=str(msg_content),
|
||||
is_at=bool(item.get("is_at")),
|
||||
)
|
||||
)
|
||||
return {
|
||||
"group_profile": group_profile or {"room_id": room_id},
|
||||
"speaker_profile": {
|
||||
@@ -67,7 +75,7 @@ class ContextBuilder:
|
||||
"image_safety_prompt": self._build_image_safety_prompt(
|
||||
(quote_context or {}).get("image_safety") or {}
|
||||
),
|
||||
"current_message": f"{sender_name}: {content}",
|
||||
"current_message": self._format_current_message_block(sender_name, content),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -85,6 +93,31 @@ class ContextBuilder:
|
||||
})
|
||||
return items
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_inline_field(value: str, max_chars: int = 120) -> str:
|
||||
# 统一把换行和分隔符清掉,避免后续在单行结构化文本里把字段边界冲散。
|
||||
text = re.sub(r"\s+", " ", str(value or "")).strip()
|
||||
text = text.replace("|", "/")
|
||||
if len(text) > max_chars:
|
||||
return text[: max_chars - 3].rstrip() + "..."
|
||||
return text
|
||||
|
||||
@classmethod
|
||||
def _format_recent_message_line(cls, idx: int, sender_name: str, content: str, is_at: bool = False) -> str:
|
||||
sender = cls._sanitize_inline_field(sender_name, max_chars=24) or "未知成员"
|
||||
body = cls._sanitize_inline_field(content, max_chars=120)
|
||||
parts = [f"[{idx:02d}]", f"发言人={sender}", f"正文={body}"]
|
||||
if is_at:
|
||||
parts.append("@bot=Y")
|
||||
return " | ".join(parts)
|
||||
|
||||
@classmethod
|
||||
def _format_current_message_block(cls, sender_name: str, content: str) -> str:
|
||||
# 当前消息改成“元信息 + 正文”两段式,方便模型只把正文视为话题语义来源。
|
||||
sender = cls._sanitize_inline_field(sender_name, max_chars=24) or "未知成员"
|
||||
body = cls._sanitize_inline_field(content, max_chars=500)
|
||||
return f"发言人={sender}\n正文={body}"
|
||||
|
||||
def _select_recent_messages(
|
||||
self,
|
||||
recent_messages: List[Dict],
|
||||
|
||||
@@ -17,7 +17,14 @@
|
||||
成员记忆、群关系记忆、群事实记忆、向量召回记忆的合并摘要。
|
||||
|
||||
`current_message`
|
||||
当前消息,格式类似:`张三: 你还活着吗`
|
||||
当前消息,格式类似:
|
||||
|
||||
```text
|
||||
发言人=张三
|
||||
正文=你还活着吗
|
||||
```
|
||||
|
||||
其中 `发言人` 是元信息,`正文` 才是当前消息内容本身,不要把昵称里的词当成话题关键词。
|
||||
|
||||
`control`
|
||||
控制信息,格式类似:
|
||||
@@ -59,6 +66,10 @@ address_style=低频称呼,默认直接接话
|
||||
6. 信息不足就收着说,不要硬编。
|
||||
7. 回复尽量短,但要保留人格味道。
|
||||
8. 只输出一个 JSON 对象,不要输出解释。
|
||||
9. 如果上下文或当前消息里出现 `发言人=...`、`正文=...`:
|
||||
- `发言人` 只是识别谁在说话
|
||||
- `正文` 才是话题内容
|
||||
- 不要把昵称、群名片、外号中的词汇误判成正在讨论的话题
|
||||
|
||||
输出格式:
|
||||
{
|
||||
|
||||
@@ -242,6 +242,10 @@ workflow:
|
||||
- 优先使用 social_short
|
||||
- 用符合人格的一句短回怼挡回去
|
||||
- 不要长篇说教,不要爆粗,不要升级成真正对骂
|
||||
9. 上下文和当前消息里如果出现 `发言人=...`、`正文=...` 这样的结构:
|
||||
- `发言人` 只是说话人元信息,用来判断对象、关系、是否在点名
|
||||
- `正文` 才是话题和语义内容
|
||||
- 不要把昵称、群名片、外号里的词当成当前讨论主题
|
||||
|
||||
输出格式:
|
||||
{
|
||||
@@ -359,6 +363,9 @@ workflow:
|
||||
- 这里优先短回一句,不要空掉
|
||||
- 用 social_short
|
||||
- 回得短、稳、带人格,但不要说教,不要骂脏话
|
||||
7. 上下文和当前消息里如果出现 `发言人=...`、`正文=...`:
|
||||
- 只把 `正文` 当作话题内容
|
||||
- `发言人` 只用于识别是谁在说话,不要把昵称里的词汇当成讨论主题
|
||||
|
||||
输出格式:
|
||||
{
|
||||
|
||||
@@ -958,7 +958,8 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
||||
"group_profile": group_profile_text,
|
||||
"context": context_text,
|
||||
"memory": memory_text,
|
||||
"current_message": f"{sender_name}: {content}",
|
||||
# 当前消息不再用“昵称: 正文”的混合写法,避免模型把昵称词汇当成当前话题的一部分。
|
||||
"current_message": self._format_current_message_block(sender_name, content),
|
||||
"control": "\n".join(control_lines),
|
||||
"images": files,
|
||||
}
|
||||
@@ -1005,12 +1006,54 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
||||
sender = str(item.get("sender", "") or "未知成员").strip()
|
||||
content = str(item.get("content", "") or "").strip()
|
||||
if sender and content:
|
||||
compact = re.sub(r"\s+", " ", content).strip()
|
||||
if len(compact) > max_line_chars:
|
||||
compact = compact[: max_line_chars - 3].rstrip() + "..."
|
||||
lines.append(f"{sender}: {compact}")
|
||||
# 最近消息统一改成“发言人字段 + 正文字段”的单行结构化格式:
|
||||
# 1. 保留 30 条上下文时,仍然是一条消息一行,不会因为多行格式把上下文窗口挤爆;
|
||||
# 2. 模型可以继续感知是谁说的,但更不容易把昵称里的词误当成话题正文;
|
||||
# 3. 如果消息里本身带 @ 标记,也显式单列出来,减少对正文理解的污染。
|
||||
lines.append(
|
||||
AIAutoResponsePlugin._format_recent_message_line(
|
||||
idx=int(item.get("idx", 0) or 0),
|
||||
sender_name=sender,
|
||||
content=content,
|
||||
max_line_chars=max_line_chars,
|
||||
is_at=bool(item.get("is_at")),
|
||||
)
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_inline_message_field(value: str, max_chars: int) -> str:
|
||||
# 这里专门给传模型的“单行结构化消息”做字段清洗,避免换行和分隔符把结构打散。
|
||||
text = re.sub(r"\s+", " ", str(value or "")).strip()
|
||||
text = text.replace("|", "/")
|
||||
if len(text) > max_chars:
|
||||
return text[: max_chars - 3].rstrip() + "..."
|
||||
return text
|
||||
|
||||
@classmethod
|
||||
def _format_recent_message_line(
|
||||
cls,
|
||||
*,
|
||||
idx: int,
|
||||
sender_name: str,
|
||||
content: str,
|
||||
max_line_chars: int,
|
||||
is_at: bool = False,
|
||||
) -> str:
|
||||
sender = cls._sanitize_inline_message_field(sender_name, max_chars=24) or "未知成员"
|
||||
body = cls._sanitize_inline_message_field(content, max_chars=max(max_line_chars, 20))
|
||||
parts = [f"[{max(idx, 1):02d}]", f"发言人={sender}", f"正文={body}"]
|
||||
if is_at:
|
||||
parts.append("@bot=Y")
|
||||
return " | ".join(parts)
|
||||
|
||||
@classmethod
|
||||
def _format_current_message_block(cls, sender_name: str, content: str) -> str:
|
||||
# 当前消息使用两行结构化文本,让工作流里的模型更容易区分“谁说的”和“说了什么”。
|
||||
sender = cls._sanitize_inline_message_field(sender_name, max_chars=24) or "未知成员"
|
||||
body = cls._sanitize_inline_message_field(content, max_chars=500)
|
||||
return f"发言人={sender}\n正文={body}"
|
||||
|
||||
@staticmethod
|
||||
def _string_block(title: str, value: Any) -> str:
|
||||
text = str(value or "").strip()
|
||||
|
||||
Reference in New Issue
Block a user