from __future__ import annotations
import html
import xml.etree.ElementTree as ET
from typing import Any, Callable, Dict
from wechat_ipad.models.message import MessageType
def parse_quote_context(full_msg: Any, room_id: str, get_sender_name: Callable[[str, str], str]) -> Dict[str, str]:
if not full_msg or not getattr(full_msg, "content", None):
return {}
xml_content = getattr(full_msg.content, "xml_content", "") or ""
if not xml_content:
return {}
try:
root = ET.fromstring(xml_content)
except ET.ParseError:
return {}
appmsg = root.find(".//appmsg")
if appmsg is None or appmsg.findtext("type", "").strip() != "57":
return {}
refer = appmsg.find("refermsg")
if refer is None:
return {}
title = html.unescape(appmsg.findtext("title", "") or "").strip()
quote_sender_name = _extract_quote_sender_name(refer, room_id, get_sender_name)
ref_type = int(refer.findtext("type", "0") or 0)
ref_content = html.unescape(refer.findtext("content", "") or "").strip()
quote_type_label = quote_type_label_for(ref_type)
quote_body = build_quote_body(ref_type, ref_content, title)
# 降噪:引用信息没有有效载荷时,不喂给下游上下文,避免污染LLM判断
if _is_low_signal_quote(quote_sender_name, quote_body, title, quote_type_label):
return {}
return {
"title": title,
"quote_sender_name": quote_sender_name,
"quote_type_label": quote_type_label,
"quote_body": quote_body,
"raw_ref_content": ref_content,
}
def quote_type_label_for(ref_type: int) -> str:
mapping = {
MessageType.TEXT.value: "引用文本",
MessageType.IMAGE.value: "引用图片",
MessageType.VIDEO.value: "引用视频",
MessageType.APP.value: "引用应用消息",
MessageType.EMOTICON.value: "引用表情",
}
return mapping.get(ref_type, f"引用消息[{ref_type}]")
def build_quote_body(ref_type: int, ref_content: str, title: str) -> str:
if ref_type == MessageType.TEXT.value:
return ref_content[:220].strip()
if ref_type == MessageType.IMAGE.value:
details = []
if title:
details.append(f"当前追问文案:{title}")
if ref_content:
details.append("被引用的是一张图片")
return ";".join(details) or "被引用的是一张图片"
if title:
return title[:220].strip()
return ref_content[:220].strip()
def _extract_quote_sender_name(
refer: ET.Element,
room_id: str,
get_sender_name: Callable[[str, str], str],
) -> str:
# 常见字段:displayname/chatusr;部分端可能是fromusr/fromnickname/source*
direct_name = _first_non_empty(
refer,
"displayname",
"fromnickname",
"sourcedisplayname",
"source_displayname",
)
if direct_name:
return direct_name
quote_sender = _first_non_empty(
refer,
"chatusr",
"fromusr",
"sourceusername",
"source_username",
)
if quote_sender:
resolved = get_sender_name(room_id, quote_sender)
return (resolved or "").strip() or quote_sender
return "未知成员"
def _first_non_empty(root: ET.Element, *tags: str) -> str:
for tag in tags:
value = html.unescape(root.findtext(tag, "") or "").strip()
if value:
return value
return ""
def _is_low_signal_quote(quote_sender_name: str, quote_body: str, title: str, quote_type_label: str) -> bool:
sender = (quote_sender_name or "").strip()
body = (quote_body or "").strip()
title_text = (title or "").strip()
type_label = (quote_type_label or "").strip()
has_sender = sender and sender != "未知成员"
has_body = bool(body)
has_title = bool(title_text)
# 引用消息[数字] 代表类型未知,若同时没有发送者/正文/标题,则直接丢弃
unknown_type = type_label.startswith("引用消息[")
if unknown_type and (not has_sender) and (not has_body) and (not has_title):
return True
# 普通场景:三者都缺失也丢弃
return (not has_sender) and (not has_body) and (not has_title)