From a68d6d5e6cd95c12e6b772b2d1e82fef90247296 Mon Sep 17 00:00:00 2001 From: liuwei Date: Thu, 16 Apr 2026 11:12:16 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=BC=95=E7=94=A8=E4=B8=8A?= =?UTF-8?q?=E4=B8=8B=E6=96=87=E8=B4=A8=E9=87=8F=E5=B9=B6=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E6=97=A0=E6=95=88=E5=BC=95=E7=94=A8=E5=99=AA=E5=A3=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 变更项: 1. 扩展引用发送者解析字段,新增 fromusr/fromnickname/sourceusername/sourcedisplayname 等兼容项。 2. 增加引用质量门控:发送者、标题、正文均缺失时直接丢弃 quote_context,避免污染 LLM。 3. 构建引用补充时不再输出“被引用发送者:未知成员”等低价值字段。 4. 增加兜底策略:仅剩引用类型且无正文标题时不输出引用补充。 --- .../context/context_builder.py | 10 ++- .../ai_auto_response/context/quote_context.py | 64 +++++++++++++++++-- 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/plugins/ai_auto_response/context/context_builder.py b/plugins/ai_auto_response/context/context_builder.py index 5631fed..3084201 100644 --- a/plugins/ai_auto_response/context/context_builder.py +++ b/plugins/ai_auto_response/context/context_builder.py @@ -401,18 +401,22 @@ class ContextBuilder: if not quote_context: return "" quote_type = quote_context.get("quote_type_label", "引用消息") - quote_sender = quote_context.get("quote_sender_name", "") or "未知成员" + quote_sender = (quote_context.get("quote_sender_name", "") or "").strip() quote_body = quote_context.get("quote_body", "") or "" title = quote_context.get("title", "") or "" lines = [ f"用户这次是在引用消息后发言。", f"引用类型:{quote_type}", - f"被引用发送者:{quote_sender}", + f"被引用发送者:{quote_sender}" if quote_sender and quote_sender != "未知成员" else "", f"图片附件:已附带原图" if quote_context.get("has_image_attachment") else "", f"引用标题:{title}" if title else "", f"被引用内容:{quote_body}" if quote_body else "", ] - return "\n".join([line for line in lines if line]) + payload = [line for line in lines if line] + # 兜底:如果最终只剩“引用类型”,没有可用内容,就不输出引用补充 + if len(payload) <= 2 and not quote_body and not title: + return "" + return "\n".join(payload) @staticmethod def _build_image_prompt(image_context: Dict) -> str: diff --git a/plugins/ai_auto_response/context/quote_context.py b/plugins/ai_auto_response/context/quote_context.py index c24f9b2..dc256ae 100644 --- a/plugins/ai_auto_response/context/quote_context.py +++ b/plugins/ai_auto_response/context/quote_context.py @@ -27,14 +27,16 @@ def parse_quote_context(full_msg: Any, room_id: str, get_sender_name: Callable[[ return {} title = html.unescape(appmsg.findtext("title", "") or "").strip() - quote_sender_name = html.unescape(refer.findtext("displayname", "") or "").strip() - if not quote_sender_name: - quote_sender = html.unescape(refer.findtext("chatusr", "") or "").strip() - quote_sender_name = get_sender_name(room_id, quote_sender) if quote_sender else "未知成员" + quote_sender_name = _extract_quote_sender_name(refer, room_id, get_sender_name) ref_type = int(refer.findtext("type", "0") or 0) ref_content = html.unescape(refer.findtext("content", "") or "").strip() quote_type_label = quote_type_label_for(ref_type) quote_body = build_quote_body(ref_type, ref_content, title) + + # 降噪:引用信息没有有效载荷时,不喂给下游上下文,避免污染LLM判断 + if _is_low_signal_quote(quote_sender_name, quote_body, title, quote_type_label): + return {} + return { "title": title, "quote_sender_name": quote_sender_name, @@ -68,3 +70,57 @@ def build_quote_body(ref_type: int, ref_content: str, title: str) -> str: if title: return title[:220].strip() return ref_content[:220].strip() + + +def _extract_quote_sender_name( + refer: ET.Element, + room_id: str, + get_sender_name: Callable[[str, str], str], +) -> str: + # 常见字段:displayname/chatusr;部分端可能是fromusr/fromnickname/source* + direct_name = _first_non_empty( + refer, + "displayname", + "fromnickname", + "sourcedisplayname", + "source_displayname", + ) + if direct_name: + return direct_name + + quote_sender = _first_non_empty( + refer, + "chatusr", + "fromusr", + "sourceusername", + "source_username", + ) + if quote_sender: + resolved = get_sender_name(room_id, quote_sender) + return (resolved or "").strip() or quote_sender + return "未知成员" + + +def _first_non_empty(root: ET.Element, *tags: str) -> str: + for tag in tags: + value = html.unescape(root.findtext(tag, "") or "").strip() + if value: + return value + return "" + + +def _is_low_signal_quote(quote_sender_name: str, quote_body: str, title: str, quote_type_label: str) -> bool: + sender = (quote_sender_name or "").strip() + body = (quote_body or "").strip() + title_text = (title or "").strip() + type_label = (quote_type_label or "").strip() + + has_sender = sender and sender != "未知成员" + has_body = bool(body) + has_title = bool(title_text) + # 引用消息[数字] 代表类型未知,若同时没有发送者/正文/标题,则直接丢弃 + unknown_type = type_label.startswith("引用消息[") + if unknown_type and (not has_sender) and (not has_body) and (not has_title): + return True + # 普通场景:三者都缺失也丢弃 + return (not has_sender) and (not has_body) and (not has_title)