from __future__ import annotations import html import xml.etree.ElementTree as ET from typing import Any, Callable, Dict from wechat_ipad.models.message import MessageType def parse_quote_context(full_msg: Any, room_id: str, get_sender_name: Callable[[str, str], str]) -> Dict[str, str]: if not full_msg or not getattr(full_msg, "content", None): return {} xml_content = getattr(full_msg.content, "xml_content", "") or "" if not xml_content: return {} try: root = ET.fromstring(xml_content) except ET.ParseError: return {} appmsg = root.find(".//appmsg") if appmsg is None or appmsg.findtext("type", "").strip() != "57": return {} refer = appmsg.find("refermsg") if refer is None: return {} title = html.unescape(appmsg.findtext("title", "") or "").strip() quote_sender_name = _extract_quote_sender_name(refer, room_id, get_sender_name) ref_type = int(refer.findtext("type", "0") or 0) ref_content = html.unescape(refer.findtext("content", "") or "").strip() quote_type_label = quote_type_label_for(ref_type) quote_body = build_quote_body(ref_type, ref_content, title) # 降噪:引用信息没有有效载荷时,不喂给下游上下文,避免污染LLM判断 if _is_low_signal_quote(quote_sender_name, quote_body, title, quote_type_label): return {} return { "title": title, "quote_sender_name": quote_sender_name, "quote_type_label": quote_type_label, "quote_body": quote_body, "raw_ref_content": ref_content, } def quote_type_label_for(ref_type: int) -> str: mapping = { MessageType.TEXT.value: "引用文本", MessageType.IMAGE.value: "引用图片", MessageType.VIDEO.value: "引用视频", MessageType.APP.value: "引用应用消息", MessageType.EMOTICON.value: "引用表情", } return mapping.get(ref_type, f"引用消息[{ref_type}]") def build_quote_body(ref_type: int, ref_content: str, title: str) -> str: if ref_type == MessageType.TEXT.value: return ref_content[:220].strip() if ref_type == MessageType.IMAGE.value: details = [] if title: details.append(f"当前追问文案:{title}") if ref_content: details.append("被引用的是一张图片") return ";".join(details) or "被引用的是一张图片" if title: return title[:220].strip() return ref_content[:220].strip() def _extract_quote_sender_name( refer: ET.Element, room_id: str, get_sender_name: Callable[[str, str], str], ) -> str: # 常见字段:displayname/chatusr;部分端可能是fromusr/fromnickname/source* direct_name = _first_non_empty( refer, "displayname", "fromnickname", "sourcedisplayname", "source_displayname", ) if direct_name: return direct_name quote_sender = _first_non_empty( refer, "chatusr", "fromusr", "sourceusername", "source_username", ) if quote_sender: resolved = get_sender_name(room_id, quote_sender) return (resolved or "").strip() or quote_sender return "未知成员" def _first_non_empty(root: ET.Element, *tags: str) -> str: for tag in tags: value = html.unescape(root.findtext(tag, "") or "").strip() if value: return value return "" def _is_low_signal_quote(quote_sender_name: str, quote_body: str, title: str, quote_type_label: str) -> bool: sender = (quote_sender_name or "").strip() body = (quote_body or "").strip() title_text = (title or "").strip() type_label = (quote_type_label or "").strip() has_sender = sender and sender != "未知成员" has_body = bool(body) has_title = bool(title_text) # 引用消息[数字] 代表类型未知,若同时没有发送者/正文/标题,则直接丢弃 unknown_type = type_label.startswith("引用消息[") if unknown_type and (not has_sender) and (not has_body) and (not has_title): return True # 普通场景:三者都缺失也丢弃 return (not has_sender) and (not has_body) and (not has_title)