Files
abot/plugins/ai_auto_response/context/quote_context.py
liuwei a68d6d5e6c 优化引用上下文质量并修复无效引用噪声
变更项:

1. 扩展引用发送者解析字段,新增 fromusr/fromnickname/sourceusername/sourcedisplayname 等兼容项。

2. 增加引用质量门控:发送者、标题、正文均缺失时直接丢弃 quote_context,避免污染 LLM。

3. 构建引用补充时不再输出“被引用发送者:未知成员”等低价值字段。

4. 增加兜底策略:仅剩引用类型且无正文标题时不输出引用补充。
2026-04-16 11:12:16 +08:00

127 lines
4.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import html
import xml.etree.ElementTree as ET
from typing import Any, Callable, Dict
from wechat_ipad.models.message import MessageType
def parse_quote_context(full_msg: Any, room_id: str, get_sender_name: Callable[[str, str], str]) -> Dict[str, str]:
if not full_msg or not getattr(full_msg, "content", None):
return {}
xml_content = getattr(full_msg.content, "xml_content", "") or ""
if not xml_content:
return {}
try:
root = ET.fromstring(xml_content)
except ET.ParseError:
return {}
appmsg = root.find(".//appmsg")
if appmsg is None or appmsg.findtext("type", "").strip() != "57":
return {}
refer = appmsg.find("refermsg")
if refer is None:
return {}
title = html.unescape(appmsg.findtext("title", "") or "").strip()
quote_sender_name = _extract_quote_sender_name(refer, room_id, get_sender_name)
ref_type = int(refer.findtext("type", "0") or 0)
ref_content = html.unescape(refer.findtext("content", "") or "").strip()
quote_type_label = quote_type_label_for(ref_type)
quote_body = build_quote_body(ref_type, ref_content, title)
# 降噪引用信息没有有效载荷时不喂给下游上下文避免污染LLM判断
if _is_low_signal_quote(quote_sender_name, quote_body, title, quote_type_label):
return {}
return {
"title": title,
"quote_sender_name": quote_sender_name,
"quote_type_label": quote_type_label,
"quote_body": quote_body,
"raw_ref_content": ref_content,
}
def quote_type_label_for(ref_type: int) -> str:
mapping = {
MessageType.TEXT.value: "引用文本",
MessageType.IMAGE.value: "引用图片",
MessageType.VIDEO.value: "引用视频",
MessageType.APP.value: "引用应用消息",
MessageType.EMOTICON.value: "引用表情",
}
return mapping.get(ref_type, f"引用消息[{ref_type}]")
def build_quote_body(ref_type: int, ref_content: str, title: str) -> str:
if ref_type == MessageType.TEXT.value:
return ref_content[:220].strip()
if ref_type == MessageType.IMAGE.value:
details = []
if title:
details.append(f"当前追问文案:{title}")
if ref_content:
details.append("被引用的是一张图片")
return "".join(details) or "被引用的是一张图片"
if title:
return title[:220].strip()
return ref_content[:220].strip()
def _extract_quote_sender_name(
refer: ET.Element,
room_id: str,
get_sender_name: Callable[[str, str], str],
) -> str:
# 常见字段displayname/chatusr部分端可能是fromusr/fromnickname/source*
direct_name = _first_non_empty(
refer,
"displayname",
"fromnickname",
"sourcedisplayname",
"source_displayname",
)
if direct_name:
return direct_name
quote_sender = _first_non_empty(
refer,
"chatusr",
"fromusr",
"sourceusername",
"source_username",
)
if quote_sender:
resolved = get_sender_name(room_id, quote_sender)
return (resolved or "").strip() or quote_sender
return "未知成员"
def _first_non_empty(root: ET.Element, *tags: str) -> str:
for tag in tags:
value = html.unescape(root.findtext(tag, "") or "").strip()
if value:
return value
return ""
def _is_low_signal_quote(quote_sender_name: str, quote_body: str, title: str, quote_type_label: str) -> bool:
sender = (quote_sender_name or "").strip()
body = (quote_body or "").strip()
title_text = (title or "").strip()
type_label = (quote_type_label or "").strip()
has_sender = sender and sender != "未知成员"
has_body = bool(body)
has_title = bool(title_text)
# 引用消息[数字] 代表类型未知,若同时没有发送者/正文/标题,则直接丢弃
unknown_type = type_label.startswith("引用消息[")
if unknown_type and (not has_sender) and (not has_body) and (not has_title):
return True
# 普通场景:三者都缺失也丢弃
return (not has_sender) and (not has_body) and (not has_title)