abot/plugins/ai_auto_response/context/quote_context.py

from __future__ import annotations

import html
import xml.etree.ElementTree as ET
from typing import Any, Callable, Dict

from wechat_ipad.models.message import MessageType


def parse_quote_context(full_msg: Any, room_id: str, get_sender_name: Callable[[str, str], str]) -> Dict[str, str]:
    if not full_msg or not getattr(full_msg, "content", None):
        return {}
    xml_content = getattr(full_msg.content, "xml_content", "") or ""
    if not xml_content:
        return {}
    try:
        root = ET.fromstring(xml_content)
    except ET.ParseError:
        return {}

    appmsg = root.find(".//appmsg")
    if appmsg is None or appmsg.findtext("type", "").strip() != "57":
        return {}

    refer = appmsg.find("refermsg")
    if refer is None:
        return {}

    title = html.unescape(appmsg.findtext("title", "") or "").strip()
    quote_sender_name = _extract_quote_sender_name(refer, room_id, get_sender_name)
    ref_type = int(refer.findtext("type", "0") or 0)
    ref_content = html.unescape(refer.findtext("content", "") or "").strip()
    quote_type_label = quote_type_label_for(ref_type)
    quote_body = build_quote_body(ref_type, ref_content, title)

    # 降噪：引用信息没有有效载荷时，不喂给下游上下文，避免污染LLM判断
    if _is_low_signal_quote(quote_sender_name, quote_body, title, quote_type_label):
        return {}

    return {
        "title": title,
        "quote_sender_name": quote_sender_name,
        "quote_type_label": quote_type_label,
        "quote_body": quote_body,
        "raw_ref_content": ref_content,
    }


def quote_type_label_for(ref_type: int) -> str:
    mapping = {
        MessageType.TEXT.value: "引用文本",
        MessageType.IMAGE.value: "引用图片",
        MessageType.VIDEO.value: "引用视频",
        MessageType.APP.value: "引用应用消息",
        MessageType.EMOTICON.value: "引用表情",
    }
    return mapping.get(ref_type, f"引用消息[{ref_type}]")


def build_quote_body(ref_type: int, ref_content: str, title: str) -> str:
    if ref_type == MessageType.TEXT.value:
        return ref_content[:220].strip()
    if ref_type == MessageType.IMAGE.value:
        details = []
        if title:
            details.append(f"当前追问文案：{title}")
        if ref_content:
            details.append("被引用的是一张图片")
        return "；".join(details) or "被引用的是一张图片"
    if title:
        return title[:220].strip()
    return ref_content[:220].strip()


def _extract_quote_sender_name(
    refer: ET.Element,
    room_id: str,
    get_sender_name: Callable[[str, str], str],
) -> str:
    # 常见字段：displayname/chatusr；部分端可能是fromusr/fromnickname/source*
    direct_name = _first_non_empty(
        refer,
        "displayname",
        "fromnickname",
        "sourcedisplayname",
        "source_displayname",
    )
    if direct_name:
        return direct_name

    quote_sender = _first_non_empty(
        refer,
        "chatusr",
        "fromusr",
        "sourceusername",
        "source_username",
    )
    if quote_sender:
        resolved = get_sender_name(room_id, quote_sender)
        return (resolved or "").strip() or quote_sender
    return "未知成员"


def _first_non_empty(root: ET.Element, *tags: str) -> str:
    for tag in tags:
        value = html.unescape(root.findtext(tag, "") or "").strip()
        if value:
            return value
    return ""


def _is_low_signal_quote(quote_sender_name: str, quote_body: str, title: str, quote_type_label: str) -> bool:
    sender = (quote_sender_name or "").strip()
    body = (quote_body or "").strip()
    title_text = (title or "").strip()
    type_label = (quote_type_label or "").strip()

    has_sender = sender and sender != "未知成员"
    has_body = bool(body)
    has_title = bool(title_text)
    # 引用消息[数字] 代表类型未知，若同时没有发送者/正文/标题，则直接丢弃
    unknown_type = type_label.startswith("引用消息[")
    if unknown_type and (not has_sender) and (not has_body) and (not has_title):
        return True
    # 普通场景：三者都缺失也丢弃
    return (not has_sender) and (not has_body) and (not has_title)