优化引用上下文质量并修复无效引用噪声

变更项： 1. 扩展引用发送者解析字段，新增 fromusr/fromnickname/sourceusername/sourcedisplayname 等兼容项。 2. 增加引用质量门控：发送者、标题、正文均缺失时直接丢弃 quote_context，避免污染 LLM。 3. 构建引用补充时不再输出“被引用发送者：未知成员”等低价值字段。 4. 增加兜底策略：仅剩引用类型且无正文标题时不输出引用补充。
2026-04-16 11:12:16 +08:00
parent b4b3fa92e0
commit a68d6d5e6c
2 changed files with 67 additions and 7 deletions
--- a/plugins/ai_auto_response/context/context_builder.py
+++ b/plugins/ai_auto_response/context/context_builder.py
@@ -401,18 +401,22 @@ class ContextBuilder:
        if not quote_context:
            return ""
        quote_type = quote_context.get("quote_type_label", "引用消息")
-        quote_sender = quote_context.get("quote_sender_name", "") or "未知成员"
+        quote_sender = (quote_context.get("quote_sender_name", "") or "").strip()
        quote_body = quote_context.get("quote_body", "") or ""
        title = quote_context.get("title", "") or ""
        lines = [
            f"用户这次是在引用消息后发言。",
            f"引用类型：{quote_type}",
-            f"被引用发送者：{quote_sender}",
+            f"被引用发送者：{quote_sender}" if quote_sender and quote_sender != "未知成员" else "",
            f"图片附件：已附带原图" if quote_context.get("has_image_attachment") else "",
            f"引用标题：{title}" if title else "",
            f"被引用内容：{quote_body}" if quote_body else "",
        ]
-        return "\n".join([line for line in lines if line])
+        payload = [line for line in lines if line]
+        # 兜底：如果最终只剩“引用类型”，没有可用内容，就不输出引用补充
+        if len(payload) <= 2 and not quote_body and not title:
+            return ""
+        return "\n".join(payload)

    @staticmethod
    def _build_image_prompt(image_context: Dict) -> str:
--- a/plugins/ai_auto_response/context/quote_context.py
+++ b/plugins/ai_auto_response/context/quote_context.py
@@ -27,14 +27,16 @@ def parse_quote_context(full_msg: Any, room_id: str, get_sender_name: Callable[[
        return {}

    title = html.unescape(appmsg.findtext("title", "") or "").strip()
-    quote_sender_name = html.unescape(refer.findtext("displayname", "") or "").strip()
-    if not quote_sender_name:
-        quote_sender = html.unescape(refer.findtext("chatusr", "") or "").strip()
-        quote_sender_name = get_sender_name(room_id, quote_sender) if quote_sender else "未知成员"
+    quote_sender_name = _extract_quote_sender_name(refer, room_id, get_sender_name)
    ref_type = int(refer.findtext("type", "0") or 0)
    ref_content = html.unescape(refer.findtext("content", "") or "").strip()
    quote_type_label = quote_type_label_for(ref_type)
    quote_body = build_quote_body(ref_type, ref_content, title)
+
+    # 降噪：引用信息没有有效载荷时，不喂给下游上下文，避免污染LLM判断
+    if _is_low_signal_quote(quote_sender_name, quote_body, title, quote_type_label):
+        return {}
+
    return {
        "title": title,
        "quote_sender_name": quote_sender_name,
@@ -68,3 +70,57 @@ def build_quote_body(ref_type: int, ref_content: str, title: str) -> str:
    if title:
        return title[:220].strip()
    return ref_content[:220].strip()
+
+
+def _extract_quote_sender_name(
+    refer: ET.Element,
+    room_id: str,
+    get_sender_name: Callable[[str, str], str],
+) -> str:
+    # 常见字段：displayname/chatusr；部分端可能是fromusr/fromnickname/source*
+    direct_name = _first_non_empty(
+        refer,
+        "displayname",
+        "fromnickname",
+        "sourcedisplayname",
+        "source_displayname",
+    )
+    if direct_name:
+        return direct_name
+
+    quote_sender = _first_non_empty(
+        refer,
+        "chatusr",
+        "fromusr",
+        "sourceusername",
+        "source_username",
+    )
+    if quote_sender:
+        resolved = get_sender_name(room_id, quote_sender)
+        return (resolved or "").strip() or quote_sender
+    return "未知成员"
+
+
+def _first_non_empty(root: ET.Element, *tags: str) -> str:
+    for tag in tags:
+        value = html.unescape(root.findtext(tag, "") or "").strip()
+        if value:
+            return value
+    return ""
+
+
+def _is_low_signal_quote(quote_sender_name: str, quote_body: str, title: str, quote_type_label: str) -> bool:
+    sender = (quote_sender_name or "").strip()
+    body = (quote_body or "").strip()
+    title_text = (title or "").strip()
+    type_label = (quote_type_label or "").strip()
+
+    has_sender = sender and sender != "未知成员"
+    has_body = bool(body)
+    has_title = bool(title_text)
+    # 引用消息[数字] 代表类型未知，若同时没有发送者/正文/标题，则直接丢弃
+    unknown_type = type_label.startswith("引用消息[")
+    if unknown_type and (not has_sender) and (not has_body) and (not has_title):
+        return True
+    # 普通场景：三者都缺失也丢弃
+    return (not has_sender) and (not has_body) and (not has_title)