拆分群昵称与正文避免话题识别被昵称污染
This commit is contained in:
@@ -30,11 +30,19 @@ class ContextBuilder:
|
||||
) -> Dict:
|
||||
selected_messages = self._select_recent_messages(recent_messages, sender, content, quote_context or {})
|
||||
recent_lines = []
|
||||
for item in selected_messages:
|
||||
for idx, item in enumerate(selected_messages, start=1):
|
||||
msg_sender = item.get("sender_name") or item.get("sender") or "未知成员"
|
||||
msg_content = item.get("content") or item.get("message") or ""
|
||||
if msg_content:
|
||||
recent_lines.append(f"{msg_sender}: {msg_content}")
|
||||
# 这里把“发言人”和“正文”拆开保存,避免后续模型把昵称词汇误当成讨论主题。
|
||||
recent_lines.append(
|
||||
self._format_recent_message_line(
|
||||
idx=idx,
|
||||
sender_name=str(msg_sender),
|
||||
content=str(msg_content),
|
||||
is_at=bool(item.get("is_at")),
|
||||
)
|
||||
)
|
||||
return {
|
||||
"group_profile": group_profile or {"room_id": room_id},
|
||||
"speaker_profile": {
|
||||
@@ -67,7 +75,7 @@ class ContextBuilder:
|
||||
"image_safety_prompt": self._build_image_safety_prompt(
|
||||
(quote_context or {}).get("image_safety") or {}
|
||||
),
|
||||
"current_message": f"{sender_name}: {content}",
|
||||
"current_message": self._format_current_message_block(sender_name, content),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -85,6 +93,31 @@ class ContextBuilder:
|
||||
})
|
||||
return items
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_inline_field(value: str, max_chars: int = 120) -> str:
|
||||
# 统一把换行和分隔符清掉,避免后续在单行结构化文本里把字段边界冲散。
|
||||
text = re.sub(r"\s+", " ", str(value or "")).strip()
|
||||
text = text.replace("|", "/")
|
||||
if len(text) > max_chars:
|
||||
return text[: max_chars - 3].rstrip() + "..."
|
||||
return text
|
||||
|
||||
@classmethod
|
||||
def _format_recent_message_line(cls, idx: int, sender_name: str, content: str, is_at: bool = False) -> str:
|
||||
sender = cls._sanitize_inline_field(sender_name, max_chars=24) or "未知成员"
|
||||
body = cls._sanitize_inline_field(content, max_chars=120)
|
||||
parts = [f"[{idx:02d}]", f"发言人={sender}", f"正文={body}"]
|
||||
if is_at:
|
||||
parts.append("@bot=Y")
|
||||
return " | ".join(parts)
|
||||
|
||||
@classmethod
|
||||
def _format_current_message_block(cls, sender_name: str, content: str) -> str:
|
||||
# 当前消息改成“元信息 + 正文”两段式,方便模型只把正文视为话题语义来源。
|
||||
sender = cls._sanitize_inline_field(sender_name, max_chars=24) or "未知成员"
|
||||
body = cls._sanitize_inline_field(content, max_chars=500)
|
||||
return f"发言人={sender}\n正文={body}"
|
||||
|
||||
def _select_recent_messages(
|
||||
self,
|
||||
recent_messages: List[Dict],
|
||||
|
||||
Reference in New Issue
Block a user