chore: sync current WechatHookBot workspace

2026-03-09 15:48:45 +08:00
parent 4016c1e6eb
commit 9119e2307d
195 changed files with 24438 additions and 17498 deletions
--- a/plugins/TavilySearch/config.toml
+++ b/plugins/TavilySearch/config.toml
@@ -0,0 +1,75 @@
+# TavilySearch 联网搜索插件配置
+# 本插件仅作为 LLM Tool 供 AIChat 插件调用
+# 搜索结果会返回给 AIChat 的 AI 进行处理和回复
+
+[tavily]
+# Tavily API 密钥列表，支持多 key 轮询
+# 获取地址: https://tavily.com/
+# 兼容：也可使用 api_key = "xxx"
+api_keys = [
+    "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",
+    "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", 
+    "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",
+    "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", # 可添加多个 key
+]
+#
+# 可选：每次请求最多尝试几个 key（默认=全部）
+# max_key_attempts = 3
+
+# 搜索深度: "basic" 或 "advanced"
+# basic: 快速搜索，适合简单查询
+# advanced: 深度搜索，结果更全面但较慢
+search_depth = "advanced"
+
+# 每次搜索返回的结果数量 (1-10)
+max_results = 5
+
+# 是否包含原始内容（会增加返回数据量）
+include_raw_content = false
+
+# 是否在返回给 AI 的结果中带上原文摘录（独立开关）
+# 打开后会自动请求 raw_content，并按 raw_content_max_chars 截断
+use_raw_content_in_result = false
+
+# 原文摘录最大字符数（防止上下文过长）
+raw_content_max_chars = 1800
+
+# 是否包含图片
+include_images = true
+
+# 当 include_images = true 时，最多发送几张图片
+max_images = 3
+image_download_concurrency = 3
+image_download_retries = 1
+image_download_timeout = 30
+
+# 是否自动拆分多子问题并分别检索
+multi_query_split = true
+
+# 单次最多拆分并检索的子问题数
+max_sub_queries = 4
+
+# 子问题最小长度（字符）
+split_min_chars = 6
+
+# 拆分后是否自动补充上下文前缀（提升“第二问”检索准确度）
+prepend_context_for_sub_query = true
+
+# 是否输出“子问题拆分”调试日志（清洗结果、拆分片段、最终子查询）
+split_debug_log = false
+
+[behavior]
+# 是否启用插件
+enabled = true
+
+[proxy]
+# 代理配置（可选，用于访问 Tavily API）
+enabled = false
+type = "http"
+host = "38.55.107.103"
+port = 53054
+
+[ssl]
+# SSL 配置
+# 如果遇到 SSL 证书验证失败，可以设置为 false 跳过验证
+verify = false
--- a/plugins/TavilySearch/main.py
+++ b/plugins/TavilySearch/main.py
@@ -7,6 +7,9 @@ TavilySearch 联网搜索插件

 import tomllib
 import aiohttp
+import uuid
+import asyncio
+import re
 from pathlib import Path
 from typing import List, Optional
 from loguru import logger
@@ -25,6 +28,7 @@ class TavilySearch(PluginBase):
        self.config = None
        self.api_keys = []
        self.current_key_index = 0
+        self.temp_dir: Optional[Path] = None

    async def async_init(self):
        """异步初始化"""
@@ -37,7 +41,10 @@ class TavilySearch(PluginBase):
            with open(config_path, "rb") as f:
                self.config = tomllib.load(f)

-            self.api_keys = [k for k in self.config["tavily"]["api_keys"] if k and not k.startswith("#")]
+            self.temp_dir = Path(__file__).parent / "temp"
+            self.temp_dir.mkdir(exist_ok=True)
+
+            self.api_keys = self._load_api_keys()
            if not self.api_keys:
                logger.warning("TavilySearch: 未配置有效的 API Key")
            else:
@@ -47,6 +54,36 @@ class TavilySearch(PluginBase):
            logger.error(f"TavilySearch 初始化失败: {e}")
            self.config = None

+    def _load_api_keys(self) -> List[str]:
+        """从配置加载 API Keys（兼容 api_key / api_keys）"""
+        if not self.config:
+            return []
+
+        tavily_config = self.config.get("tavily", {})
+        keys: List[str] = []
+
+        raw_keys = tavily_config.get("api_keys", [])
+        if isinstance(raw_keys, str):
+            keys.extend([k.strip() for k in raw_keys.replace("\n", ",").split(",")])
+        elif isinstance(raw_keys, list):
+            keys.extend([str(k).strip() for k in raw_keys])
+
+        single_key = str(tavily_config.get("api_key", "")).strip()
+        if single_key:
+            keys.append(single_key)
+
+        cleaned = []
+        seen = set()
+        for k in keys:
+            if not k or k.startswith("#"):
+                continue
+            if k in seen:
+                continue
+            seen.add(k)
+            cleaned.append(k)
+
+        return cleaned
+
    def _get_next_api_key(self) -> str:
        """轮询获取下一个 API Key"""
        if not self.api_keys:
@@ -55,25 +92,150 @@ class TavilySearch(PluginBase):
        self.current_key_index = (self.current_key_index + 1) % len(self.api_keys)
        return key

+    def _clean_query_text(self, text: str) -> str:
+        """清洗查询文本"""
+        cleaned = str(text or "").strip()
+        if not cleaned:
+            return ""
+
+        cleaned = cleaned.replace("【当前消息】", "").strip()
+        cleaned = re.sub(r"^(?:@\S+\s*)+", "", cleaned)
+        cleaned = re.sub(
+            r"^(?:请|帮我|麻烦|请帮我)?(?:搜索|搜|查|查询|检索|搜一下|查一下|搜索下|搜下)\s*",
+            "",
+            cleaned,
+        )
+        return cleaned.strip()
+
+    def _extract_topic_hint(self, query: str) -> str:
+        """提取主题前缀，用于补全后续子问题上下文"""
+        text = self._clean_query_text(query)
+        if not text:
+            return ""
+
+        first_part = text
+        for sep in ("和", "以及", "并且", "还有", "同时", "，", ",", "；", ";", "。"):
+            idx = first_part.find(sep)
+            if idx > 0:
+                first_part = first_part[:idx].strip()
+                break
+
+        match = re.match(r"^(.{2,40}?)(?:的|是|有哪些|包括|改动|更新|介绍|详情|内容|情况)", first_part)
+        topic_hint = match.group(1).strip() if match else ""
+        if not topic_hint and len(first_part) <= 40:
+            topic_hint = first_part
+
+        topic_hint = re.sub(r"(是什么|有哪些|有啥|是什么样).*$", "", topic_hint).strip()
+        return topic_hint
+
+    def _split_multi_queries(self, query: str, tavily_config: dict) -> List[str]:
+        """将复合问题拆分为多个子查询"""
+        split_debug_log = bool(tavily_config.get("split_debug_log", False))
+        raw = self._clean_query_text(query)
+        if not raw:
+            return []
+
+        if split_debug_log:
+            logger.info(f"[TavilySplit] 原始查询: {query}")
+            logger.info(f"[TavilySplit] 清洗后查询: {raw}")
+
+        max_sub_queries = int(tavily_config.get("max_sub_queries", 4) or 4)
+        split_min_chars = int(tavily_config.get("split_min_chars", 6) or 6)
+        prepend_context = bool(tavily_config.get("prepend_context_for_sub_query", True))
+
+        normalized = raw
+        normalized = re.sub(r"(另外|此外|同时|并且|还有|以及|然后|再者|顺便)", "｜", normalized)
+        normalized = re.sub(r"[；;。！？!?\n\r]+", "｜", normalized)
+
+        parts = [
+            p.strip(" ，,、|｜")
+            for p in normalized.split("｜")
+            if p.strip(" ，,、|｜")
+        ]
+
+        if split_debug_log:
+            logger.info(f"[TavilySplit] 初步拆分片段: {parts}")
+
+        if len(parts) == 1:
+            single = parts[0]
+            if "和" in single and len(single) >= split_min_chars * 2:
+                candidate = re.split(r"\s*和\s*", single, maxsplit=1)
+                if len(candidate) == 2:
+                    left = candidate[0].strip()
+                    right = candidate[1].strip()
+                    if len(left) >= split_min_chars and len(right) >= split_min_chars:
+                        parts = [left, right]
+                        if split_debug_log:
+                            logger.info(f"[TavilySplit] 通过“和”二次拆分: {parts}")
+
+        # 语义拆分兜底：即使没有明显连接词，也尽量把“版本改动 + 英雄技能介绍”拆开
+        if len(parts) == 1:
+            single = parts[0].strip()
+            change_keywords = ("改动", "更新", "变更", "调整", "改版", "平衡")
+            hero_keywords = ("新英雄", "英雄", "技能", "机制", "天赋", "介绍", "详解")
+
+            change_pos = min([single.find(k) for k in change_keywords if k in single] or [-1])
+            hero_pos = min([single.find(k) for k in hero_keywords if k in single] or [-1])
+
+            if change_pos >= 0 and hero_pos >= 0 and hero_pos > change_pos:
+                left = single[:hero_pos].strip(" ，,、")
+                right = single[hero_pos:].strip(" ，,、")
+
+                if len(left) >= split_min_chars and len(right) >= split_min_chars:
+                    topic_hint = self._extract_topic_hint(left or single)
+                    if topic_hint and topic_hint not in right:
+                        right = f"{topic_hint} {right}".strip()
+
+                    parts = [left, right]
+                    if split_debug_log:
+                        logger.info(f"[TavilySplit] 语义兜底拆分: {parts}")
+
+        deduped: List[str] = []
+        seen = set()
+        for p in parts:
+            if len(p) < split_min_chars:
+                continue
+            if p in seen:
+                continue
+            seen.add(p)
+            deduped.append(p)
+
+        parts = deduped[:max_sub_queries] if deduped else [raw]
+
+        if split_debug_log:
+            logger.info(f"[TavilySplit] 去重截断后: {parts}")
+
+        if prepend_context and len(parts) > 1:
+            topic_hint = self._extract_topic_hint(parts[0] or raw)
+            if topic_hint:
+                with_context: List[str] = []
+                for idx, p in enumerate(parts):
+                    item = p
+                    if idx > 0 and topic_hint not in item:
+                        item = f"{topic_hint} {item}".strip()
+                    with_context.append(item)
+                parts = with_context
+                if split_debug_log:
+                    logger.info(f"[TavilySplit] 主题前缀: {topic_hint}")
+                    logger.info(f"[TavilySplit] 前缀补全后: {parts}")
+
+        if split_debug_log:
+            logger.info(f"[TavilySplit] 最终子查询({len(parts)}): {parts}")
+
+        return parts
+
+    def _truncate_text(self, text: str, max_chars: int) -> str:
+        """按字符数截断文本"""
+        content = str(text or "").strip()
+        if max_chars <= 0 or len(content) <= max_chars:
+            return content
+        return content[:max_chars].rstrip() + "..."
+
    async def _search_tavily(self, query: str) -> Optional[dict]:
        """调用 Tavily API 进行搜索"""
-        api_key = self._get_next_api_key()
-        if not api_key:
-            logger.error("没有可用的 Tavily API Key")
-            return None
-
        tavily_config = self.config["tavily"]
        proxy_config = self.config.get("proxy", {})

-        payload = {
-            "api_key": api_key,
-            "query": query,
-            "search_depth": tavily_config.get("search_depth", "basic"),
-            "max_results": tavily_config.get("max_results", 5),
-            "include_raw_content": tavily_config.get("include_raw_content", False),
-            "include_images": tavily_config.get("include_images", False),
-        }
-
        proxy = None
        if proxy_config.get("enabled", False):
            proxy_type = proxy_config.get("type", "http")
@@ -97,37 +259,179 @@ class TavilySearch(PluginBase):
                ssl_context.verify_mode = ssl.CERT_NONE
                connector = aiohttp.TCPConnector(ssl=ssl_context)

+            if not self.api_keys:
+                logger.error("没有可用的 Tavily API Key")
+                return None
+
+            max_attempts = min(len(self.api_keys), tavily_config.get("max_key_attempts", len(self.api_keys)))
+
            async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
-                async with session.post(
-                    "https://api.tavily.com/search",
-                    json=payload,
-                    proxy=proxy
-                ) as resp:
-                    if resp.status == 200:
-                        result = await resp.json()
-                        logger.info(f"Tavily 搜索成功: {query[:30]}...")
-                        logger.info(f"Tavily 原始返回: {result}")
-                        return result
-                    else:
+                for attempt in range(max_attempts):
+                    api_key = self._get_next_api_key()
+                    if not api_key:
+                        logger.error("没有可用的 Tavily API Key")
+                        return None
+
+                    payload = {
+                        "api_key": api_key,
+                        "query": query,
+                        "search_depth": tavily_config.get("search_depth", "basic"),
+                        "max_results": tavily_config.get("max_results", 5),
+                        "include_raw_content": (
+                            tavily_config.get("include_raw_content", False)
+                            or tavily_config.get("use_raw_content_in_result", False)
+                        ),
+                        "include_images": tavily_config.get("include_images", False),
+                    }
+
+                    async with session.post(
+                        "https://api.tavily.com/search",
+                        json=payload,
+                        proxy=proxy
+                    ) as resp:
+                        if resp.status == 200:
+                            result = await resp.json()
+                            logger.info(f"Tavily 搜索成功: {query[:30]}...")
+                            logger.info(f"Tavily 原始返回: {result}")
+                            return result
+
                        error_text = await resp.text()
-                        logger.error(f"Tavily API 错误: {resp.status}, {error_text}")
+                        logger.warning(
+                            f"Tavily API 错误: {resp.status}, 尝试 key {attempt + 1}/{max_attempts}, "
+                            f"body={error_text[:200]}"
+                        )
+
+                        if resp.status in {401, 403, 429}:
+                            continue
                        return None

        except Exception as e:
            logger.error(f"Tavily 搜索失败: {e}")
            return None

-    def _format_search_results(self, results: dict) -> str:
+    def _extract_image_urls(self, results: dict) -> List[str]:
+        """从搜索结果中提取图片 URL"""
+        if not results:
+            return []
+
+        images = results.get("images", [])
+        urls: List[str] = []
+        for item in images:
+            if isinstance(item, str):
+                url = item.strip()
+            elif isinstance(item, dict):
+                url = (item.get("url") or item.get("image") or item.get("src") or "").strip()
+            else:
+                url = ""
+
+            if url:
+                urls.append(url)
+
+        return urls
+
+    async def _download_image_with_session(
+        self,
+        session: aiohttp.ClientSession,
+        url: str,
+        proxy: Optional[str],
+        max_retries: int = 1
+    ) -> Optional[str]:
+        """下载图片到本地临时目录（复用 session）"""
+        if not self.temp_dir:
+            return None
+        for attempt in range(max_retries + 1):
+            try:
+                async with session.get(url, proxy=proxy) as resp:
+                    if resp.status != 200:
+                        if attempt >= max_retries:
+                            return None
+                        await asyncio.sleep(0.5 * (attempt + 1))
+                        continue
+                    content = await resp.read()
+
+                ext = Path(url).suffix.lower()
+                if ext not in {".jpg", ".jpeg", ".png", ".webp"}:
+                    ext = ".jpg"
+                filename = f"tavily_{uuid.uuid4().hex}{ext}"
+                save_path = self.temp_dir / filename
+                with open(save_path, "wb") as f:
+                    f.write(content)
+                return str(save_path)
+            except Exception as e:
+                if attempt < max_retries:
+                    await asyncio.sleep(0.5 * (attempt + 1))
+                    continue
+                logger.warning(f"下载图片失败: {url} -> {e}")
+                return None
+
+    async def _download_image(self, url: str) -> Optional[str]:
+        """下载图片到本地临时目录（兼容旧调用）"""
+        if not self.temp_dir:
+            return None
+        try:
+            import ssl
+            timeout = aiohttp.ClientTimeout(total=30)
+
+            proxy_config = self.config.get("proxy", {}) if self.config else {}
+            proxy = None
+            if proxy_config.get("enabled", False):
+                proxy_type = proxy_config.get("type", "http")
+                proxy_host = proxy_config.get("host", "127.0.0.1")
+                proxy_port = proxy_config.get("port", 7890)
+                proxy = f"{proxy_type}://{proxy_host}:{proxy_port}"
+
+            ssl_config = self.config.get("ssl", {}) if self.config else {}
+            ssl_verify = ssl_config.get("verify", True)
+            ssl_context = None
+            if not ssl_verify:
+                ssl_context = ssl.create_default_context()
+                ssl_context.check_hostname = False
+                ssl_context.verify_mode = ssl.CERT_NONE
+
+            connector = aiohttp.TCPConnector(ssl=ssl_context) if ssl_context else None
+
+            async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
+                return await self._download_image_with_session(session, url, proxy, max_retries=1)
+        except Exception as e:
+            logger.warning(f"下载图片失败: {url} -> {e}")
+            return None
+
+    def _format_search_results(
+        self,
+        results: dict,
+        *,
+        include_raw_content: bool = False,
+        raw_content_max_chars: int = 1800,
+        section_title: Optional[str] = None,
+    ) -> str:
        """格式化搜索结果供 AI 处理"""
        if not results or "results" not in results:
+            if section_title:
+                return f"{section_title}\n未找到相关搜索结果"
            return "未找到相关搜索结果"

        formatted = []
+        if section_title:
+            formatted.append(section_title)
+
        for i, item in enumerate(results["results"], 1):
            title = item.get("title", "无标题")
            content = item.get("content", "")
            url = item.get("url", "")
-            formatted.append(f"【结果 {i}】\n标题: {title}\n内容: {content}\n来源: {url}\n")
+
+            block = [
+                f"【结果 {i}】",
+                f"标题: {title}",
+                f"内容: {content}",
+                f"来源: {url}",
+            ]
+
+            if include_raw_content:
+                raw_content = self._truncate_text(item.get("raw_content", ""), raw_content_max_chars)
+                if raw_content:
+                    block.append(f"原文摘录: {raw_content}")
+
+            formatted.append("\n".join(block) + "\n")

        return "\n".join(formatted)

@@ -141,16 +445,21 @@ class TavilySearch(PluginBase):
                "type": "function",
                "function": {
                    "name": "tavily_web_search",
-                    "description": "仅当用户明确要求“联网搜索/查资料/最新信息/来源/权威说法”或需要事实核实时调用；不要在闲聊中触发。",
+                    "description": (
+                        "执行联网检索并返回可引用的信息来源。"
+                        "仅在用户明确要求查资料、最新信息、权威来源或需要事实核实时调用；"
+                        "可直接回答的问题不要触发该工具。"
+                    ),
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "query": {
                                "type": "string",
-                                "description": "搜索关键词或问题，建议使用简洁明确的搜索词"
+                                "description": "检索问题或关键词。应简洁、明确，避免口语噪声。"
                            }
                        },
-                        "required": ["query"]
+                        "required": ["query"],
+                        "additionalProperties": False
                    }
                }
            }
@@ -175,16 +484,124 @@ class TavilySearch(PluginBase):
        if not query:
            return {"success": False, "message": "搜索关键词不能为空"}

+        tavily_config = self.config.get("tavily", {})
+        multi_query_split = bool(tavily_config.get("multi_query_split", True))
+        use_raw_content_in_result = bool(tavily_config.get("use_raw_content_in_result", False))
+        raw_content_max_chars = int(tavily_config.get("raw_content_max_chars", 1800) or 1800)
+
        try:
            logger.info(f"开始 Tavily 搜索: {query}")

-            # 调用 Tavily 搜索
-            search_results = await self._search_tavily(query)
-            if not search_results:
+            split_debug_log = bool(tavily_config.get("split_debug_log", False))
+
+            if multi_query_split:
+                sub_queries = self._split_multi_queries(query, tavily_config)
+            else:
+                cleaned_query = self._clean_query_text(query)
+                sub_queries = [cleaned_query] if cleaned_query else [str(query).strip()]
+
+            if not sub_queries:
+                return {"success": False, "message": "搜索关键词不能为空"}
+
+            if split_debug_log:
+                logger.info(f"Tavily 子问题拆分完成，共 {len(sub_queries)} 个: {sub_queries}")
+            else:
+                logger.info(f"Tavily 子问题拆分完成，共 {len(sub_queries)} 个")
+
+            search_batches = []
+            failed_queries = []
+            for sub_query in sub_queries:
+                result = await self._search_tavily(sub_query)
+                if result:
+                    search_batches.append((sub_query, result))
+                else:
+                    failed_queries.append(sub_query)
+
+            if not search_batches:
                return {"success": False, "message": "搜索失败，请稍后重试"}

+            # 发送搜索图片（若开启 include_images）
+            if tavily_config.get("include_images", False):
+                image_urls = []
+                for _sub_query, sub_result in search_batches:
+                    image_urls.extend(self._extract_image_urls(sub_result))
+
+                if image_urls:
+                    image_urls = list(dict.fromkeys(image_urls))
+
+                max_images = int(tavily_config.get("max_images", 3) or 3)
+                download_concurrency = int(tavily_config.get("image_download_concurrency", 3) or 3)
+                download_retries = int(tavily_config.get("image_download_retries", 1) or 1)
+                download_timeout = int(tavily_config.get("image_download_timeout", 30) or 30)
+
+                import ssl
+                timeout = aiohttp.ClientTimeout(total=download_timeout)
+                proxy_config = self.config.get("proxy", {}) if self.config else {}
+                proxy = None
+                if proxy_config.get("enabled", False):
+                    proxy_type = proxy_config.get("type", "http")
+                    proxy_host = proxy_config.get("host", "127.0.0.1")
+                    proxy_port = proxy_config.get("port", 7890)
+                    proxy = f"{proxy_type}://{proxy_host}:{proxy_port}"
+
+                ssl_config = self.config.get("ssl", {}) if self.config else {}
+                ssl_verify = ssl_config.get("verify", True)
+                ssl_context = None
+                if not ssl_verify:
+                    ssl_context = ssl.create_default_context()
+                    ssl_context.check_hostname = False
+                    ssl_context.verify_mode = ssl.CERT_NONE
+
+                connector = aiohttp.TCPConnector(ssl=ssl_context) if ssl_context else None
+                semaphore = asyncio.Semaphore(max(1, download_concurrency))
+
+                async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
+                    async def fetch_image(url: str) -> Optional[str]:
+                        async with semaphore:
+                            return await self._download_image_with_session(
+                                session,
+                                url,
+                                proxy,
+                                max_retries=download_retries
+                            )
+
+                    tasks = [fetch_image(url) for url in image_urls[:max_images]]
+                    results = await asyncio.gather(*tasks, return_exceptions=True)
+
+                sent = 0
+                for result in results:
+                    if sent >= max_images:
+                        break
+                    if isinstance(result, str) and result:
+                        await bot.send_image(from_wxid, result)
+                        sent += 1
+
            # 格式化搜索结果
-            formatted_results = self._format_search_results(search_results)
+            if len(search_batches) == 1:
+                formatted_results = self._format_search_results(
+                    search_batches[0][1],
+                    include_raw_content=use_raw_content_in_result,
+                    raw_content_max_chars=raw_content_max_chars,
+                )
+            else:
+                sections = []
+                for idx, (sub_query, sub_result) in enumerate(search_batches, 1):
+                    sections.append(
+                        self._format_search_results(
+                            sub_result,
+                            include_raw_content=use_raw_content_in_result,
+                            raw_content_max_chars=raw_content_max_chars,
+                            section_title=f"【子问题 {idx}】{sub_query}",
+                        )
+                    )
+                formatted_results = "\n\n".join(sections)
+
+            if failed_queries:
+                failed_text = "\n".join([f"- {q}" for q in failed_queries])
+                formatted_results = (
+                    f"{formatted_results}\n\n"
+                    f"【未检索成功的子问题】\n{failed_text}"
+                )

            logger.success(f"Tavily 搜索完成: {query[:30]}...")