优化引用上下文质量并修复无效引用噪声
变更项: 1. 扩展引用发送者解析字段,新增 fromusr/fromnickname/sourceusername/sourcedisplayname 等兼容项。 2. 增加引用质量门控:发送者、标题、正文均缺失时直接丢弃 quote_context,避免污染 LLM。 3. 构建引用补充时不再输出“被引用发送者:未知成员”等低价值字段。 4. 增加兜底策略:仅剩引用类型且无正文标题时不输出引用补充。
This commit is contained in:
@@ -401,18 +401,22 @@ class ContextBuilder:
|
||||
if not quote_context:
|
||||
return ""
|
||||
quote_type = quote_context.get("quote_type_label", "引用消息")
|
||||
quote_sender = quote_context.get("quote_sender_name", "") or "未知成员"
|
||||
quote_sender = (quote_context.get("quote_sender_name", "") or "").strip()
|
||||
quote_body = quote_context.get("quote_body", "") or ""
|
||||
title = quote_context.get("title", "") or ""
|
||||
lines = [
|
||||
f"用户这次是在引用消息后发言。",
|
||||
f"引用类型:{quote_type}",
|
||||
f"被引用发送者:{quote_sender}",
|
||||
f"被引用发送者:{quote_sender}" if quote_sender and quote_sender != "未知成员" else "",
|
||||
f"图片附件:已附带原图" if quote_context.get("has_image_attachment") else "",
|
||||
f"引用标题:{title}" if title else "",
|
||||
f"被引用内容:{quote_body}" if quote_body else "",
|
||||
]
|
||||
return "\n".join([line for line in lines if line])
|
||||
payload = [line for line in lines if line]
|
||||
# 兜底:如果最终只剩“引用类型”,没有可用内容,就不输出引用补充
|
||||
if len(payload) <= 2 and not quote_body and not title:
|
||||
return ""
|
||||
return "\n".join(payload)
|
||||
|
||||
@staticmethod
|
||||
def _build_image_prompt(image_context: Dict) -> str:
|
||||
|
||||
@@ -27,14 +27,16 @@ def parse_quote_context(full_msg: Any, room_id: str, get_sender_name: Callable[[
|
||||
return {}
|
||||
|
||||
title = html.unescape(appmsg.findtext("title", "") or "").strip()
|
||||
quote_sender_name = html.unescape(refer.findtext("displayname", "") or "").strip()
|
||||
if not quote_sender_name:
|
||||
quote_sender = html.unescape(refer.findtext("chatusr", "") or "").strip()
|
||||
quote_sender_name = get_sender_name(room_id, quote_sender) if quote_sender else "未知成员"
|
||||
quote_sender_name = _extract_quote_sender_name(refer, room_id, get_sender_name)
|
||||
ref_type = int(refer.findtext("type", "0") or 0)
|
||||
ref_content = html.unescape(refer.findtext("content", "") or "").strip()
|
||||
quote_type_label = quote_type_label_for(ref_type)
|
||||
quote_body = build_quote_body(ref_type, ref_content, title)
|
||||
|
||||
# 降噪:引用信息没有有效载荷时,不喂给下游上下文,避免污染LLM判断
|
||||
if _is_low_signal_quote(quote_sender_name, quote_body, title, quote_type_label):
|
||||
return {}
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"quote_sender_name": quote_sender_name,
|
||||
@@ -68,3 +70,57 @@ def build_quote_body(ref_type: int, ref_content: str, title: str) -> str:
|
||||
if title:
|
||||
return title[:220].strip()
|
||||
return ref_content[:220].strip()
|
||||
|
||||
|
||||
def _extract_quote_sender_name(
|
||||
refer: ET.Element,
|
||||
room_id: str,
|
||||
get_sender_name: Callable[[str, str], str],
|
||||
) -> str:
|
||||
# 常见字段:displayname/chatusr;部分端可能是fromusr/fromnickname/source*
|
||||
direct_name = _first_non_empty(
|
||||
refer,
|
||||
"displayname",
|
||||
"fromnickname",
|
||||
"sourcedisplayname",
|
||||
"source_displayname",
|
||||
)
|
||||
if direct_name:
|
||||
return direct_name
|
||||
|
||||
quote_sender = _first_non_empty(
|
||||
refer,
|
||||
"chatusr",
|
||||
"fromusr",
|
||||
"sourceusername",
|
||||
"source_username",
|
||||
)
|
||||
if quote_sender:
|
||||
resolved = get_sender_name(room_id, quote_sender)
|
||||
return (resolved or "").strip() or quote_sender
|
||||
return "未知成员"
|
||||
|
||||
|
||||
def _first_non_empty(root: ET.Element, *tags: str) -> str:
|
||||
for tag in tags:
|
||||
value = html.unescape(root.findtext(tag, "") or "").strip()
|
||||
if value:
|
||||
return value
|
||||
return ""
|
||||
|
||||
|
||||
def _is_low_signal_quote(quote_sender_name: str, quote_body: str, title: str, quote_type_label: str) -> bool:
|
||||
sender = (quote_sender_name or "").strip()
|
||||
body = (quote_body or "").strip()
|
||||
title_text = (title or "").strip()
|
||||
type_label = (quote_type_label or "").strip()
|
||||
|
||||
has_sender = sender and sender != "未知成员"
|
||||
has_body = bool(body)
|
||||
has_title = bool(title_text)
|
||||
# 引用消息[数字] 代表类型未知,若同时没有发送者/正文/标题,则直接丢弃
|
||||
unknown_type = type_label.startswith("引用消息[")
|
||||
if unknown_type and (not has_sender) and (not has_body) and (not has_title):
|
||||
return True
|
||||
# 普通场景:三者都缺失也丢弃
|
||||
return (not has_sender) and (not has_body) and (not has_title)
|
||||
|
||||
Reference in New Issue
Block a user