优化引用上下文质量并修复无效引用噪声
变更项: 1. 扩展引用发送者解析字段,新增 fromusr/fromnickname/sourceusername/sourcedisplayname 等兼容项。 2. 增加引用质量门控:发送者、标题、正文均缺失时直接丢弃 quote_context,避免污染 LLM。 3. 构建引用补充时不再输出“被引用发送者:未知成员”等低价值字段。 4. 增加兜底策略:仅剩引用类型且无正文标题时不输出引用补充。
This commit is contained in:
@@ -401,18 +401,22 @@ class ContextBuilder:
|
|||||||
if not quote_context:
|
if not quote_context:
|
||||||
return ""
|
return ""
|
||||||
quote_type = quote_context.get("quote_type_label", "引用消息")
|
quote_type = quote_context.get("quote_type_label", "引用消息")
|
||||||
quote_sender = quote_context.get("quote_sender_name", "") or "未知成员"
|
quote_sender = (quote_context.get("quote_sender_name", "") or "").strip()
|
||||||
quote_body = quote_context.get("quote_body", "") or ""
|
quote_body = quote_context.get("quote_body", "") or ""
|
||||||
title = quote_context.get("title", "") or ""
|
title = quote_context.get("title", "") or ""
|
||||||
lines = [
|
lines = [
|
||||||
f"用户这次是在引用消息后发言。",
|
f"用户这次是在引用消息后发言。",
|
||||||
f"引用类型:{quote_type}",
|
f"引用类型:{quote_type}",
|
||||||
f"被引用发送者:{quote_sender}",
|
f"被引用发送者:{quote_sender}" if quote_sender and quote_sender != "未知成员" else "",
|
||||||
f"图片附件:已附带原图" if quote_context.get("has_image_attachment") else "",
|
f"图片附件:已附带原图" if quote_context.get("has_image_attachment") else "",
|
||||||
f"引用标题:{title}" if title else "",
|
f"引用标题:{title}" if title else "",
|
||||||
f"被引用内容:{quote_body}" if quote_body else "",
|
f"被引用内容:{quote_body}" if quote_body else "",
|
||||||
]
|
]
|
||||||
return "\n".join([line for line in lines if line])
|
payload = [line for line in lines if line]
|
||||||
|
# 兜底:如果最终只剩“引用类型”,没有可用内容,就不输出引用补充
|
||||||
|
if len(payload) <= 2 and not quote_body and not title:
|
||||||
|
return ""
|
||||||
|
return "\n".join(payload)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_image_prompt(image_context: Dict) -> str:
|
def _build_image_prompt(image_context: Dict) -> str:
|
||||||
|
|||||||
@@ -27,14 +27,16 @@ def parse_quote_context(full_msg: Any, room_id: str, get_sender_name: Callable[[
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
title = html.unescape(appmsg.findtext("title", "") or "").strip()
|
title = html.unescape(appmsg.findtext("title", "") or "").strip()
|
||||||
quote_sender_name = html.unescape(refer.findtext("displayname", "") or "").strip()
|
quote_sender_name = _extract_quote_sender_name(refer, room_id, get_sender_name)
|
||||||
if not quote_sender_name:
|
|
||||||
quote_sender = html.unescape(refer.findtext("chatusr", "") or "").strip()
|
|
||||||
quote_sender_name = get_sender_name(room_id, quote_sender) if quote_sender else "未知成员"
|
|
||||||
ref_type = int(refer.findtext("type", "0") or 0)
|
ref_type = int(refer.findtext("type", "0") or 0)
|
||||||
ref_content = html.unescape(refer.findtext("content", "") or "").strip()
|
ref_content = html.unescape(refer.findtext("content", "") or "").strip()
|
||||||
quote_type_label = quote_type_label_for(ref_type)
|
quote_type_label = quote_type_label_for(ref_type)
|
||||||
quote_body = build_quote_body(ref_type, ref_content, title)
|
quote_body = build_quote_body(ref_type, ref_content, title)
|
||||||
|
|
||||||
|
# 降噪:引用信息没有有效载荷时,不喂给下游上下文,避免污染LLM判断
|
||||||
|
if _is_low_signal_quote(quote_sender_name, quote_body, title, quote_type_label):
|
||||||
|
return {}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"title": title,
|
"title": title,
|
||||||
"quote_sender_name": quote_sender_name,
|
"quote_sender_name": quote_sender_name,
|
||||||
@@ -68,3 +70,57 @@ def build_quote_body(ref_type: int, ref_content: str, title: str) -> str:
|
|||||||
if title:
|
if title:
|
||||||
return title[:220].strip()
|
return title[:220].strip()
|
||||||
return ref_content[:220].strip()
|
return ref_content[:220].strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_quote_sender_name(
|
||||||
|
refer: ET.Element,
|
||||||
|
room_id: str,
|
||||||
|
get_sender_name: Callable[[str, str], str],
|
||||||
|
) -> str:
|
||||||
|
# 常见字段:displayname/chatusr;部分端可能是fromusr/fromnickname/source*
|
||||||
|
direct_name = _first_non_empty(
|
||||||
|
refer,
|
||||||
|
"displayname",
|
||||||
|
"fromnickname",
|
||||||
|
"sourcedisplayname",
|
||||||
|
"source_displayname",
|
||||||
|
)
|
||||||
|
if direct_name:
|
||||||
|
return direct_name
|
||||||
|
|
||||||
|
quote_sender = _first_non_empty(
|
||||||
|
refer,
|
||||||
|
"chatusr",
|
||||||
|
"fromusr",
|
||||||
|
"sourceusername",
|
||||||
|
"source_username",
|
||||||
|
)
|
||||||
|
if quote_sender:
|
||||||
|
resolved = get_sender_name(room_id, quote_sender)
|
||||||
|
return (resolved or "").strip() or quote_sender
|
||||||
|
return "未知成员"
|
||||||
|
|
||||||
|
|
||||||
|
def _first_non_empty(root: ET.Element, *tags: str) -> str:
|
||||||
|
for tag in tags:
|
||||||
|
value = html.unescape(root.findtext(tag, "") or "").strip()
|
||||||
|
if value:
|
||||||
|
return value
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _is_low_signal_quote(quote_sender_name: str, quote_body: str, title: str, quote_type_label: str) -> bool:
|
||||||
|
sender = (quote_sender_name or "").strip()
|
||||||
|
body = (quote_body or "").strip()
|
||||||
|
title_text = (title or "").strip()
|
||||||
|
type_label = (quote_type_label or "").strip()
|
||||||
|
|
||||||
|
has_sender = sender and sender != "未知成员"
|
||||||
|
has_body = bool(body)
|
||||||
|
has_title = bool(title_text)
|
||||||
|
# 引用消息[数字] 代表类型未知,若同时没有发送者/正文/标题,则直接丢弃
|
||||||
|
unknown_type = type_label.startswith("引用消息[")
|
||||||
|
if unknown_type and (not has_sender) and (not has_body) and (not has_title):
|
||||||
|
return True
|
||||||
|
# 普通场景:三者都缺失也丢弃
|
||||||
|
return (not has_sender) and (not has_body) and (not has_title)
|
||||||
|
|||||||
Reference in New Issue
Block a user