chore: sync current WechatHookBot workspace

This commit is contained in:
2026-03-09 15:48:45 +08:00
parent 4016c1e6eb
commit 9119e2307d
195 changed files with 24438 additions and 17498 deletions

View File

@@ -0,0 +1,75 @@
# TavilySearch 联网搜索插件配置
# 本插件仅作为 LLM Tool 供 AIChat 插件调用
# 搜索结果会返回给 AIChat 的 AI 进行处理和回复
[tavily]
# Tavily API 密钥列表,支持多 key 轮询
# 获取地址: https://tavily.com/
# 兼容:也可使用 api_key = "xxx"
api_keys = [
"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",
"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",
"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",
"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", # 可添加多个 key
]
#
# 可选:每次请求最多尝试几个 key默认=全部)
# max_key_attempts = 3
# 搜索深度: "basic" 或 "advanced"
# basic: 快速搜索,适合简单查询
# advanced: 深度搜索,结果更全面但较慢
search_depth = "advanced"
# 每次搜索返回的结果数量 (1-10)
max_results = 5
# 是否包含原始内容(会增加返回数据量)
include_raw_content = false
# 是否在返回给 AI 的结果中带上原文摘录(独立开关)
# 打开后会自动请求 raw_content并按 raw_content_max_chars 截断
use_raw_content_in_result = false
# 原文摘录最大字符数(防止上下文过长)
raw_content_max_chars = 1800
# 是否包含图片
include_images = true
# 当 include_images = true 时,最多发送几张图片
max_images = 3
image_download_concurrency = 3
image_download_retries = 1
image_download_timeout = 30
# 是否自动拆分多子问题并分别检索
multi_query_split = true
# 单次最多拆分并检索的子问题数
max_sub_queries = 4
# 子问题最小长度(字符)
split_min_chars = 6
# 拆分后是否自动补充上下文前缀(提升“第二问”检索准确度)
prepend_context_for_sub_query = true
# 是否输出“子问题拆分”调试日志(清洗结果、拆分片段、最终子查询)
split_debug_log = false
[behavior]
# 是否启用插件
enabled = true
[proxy]
# 代理配置(可选,用于访问 Tavily API
enabled = false
type = "http"
host = "38.55.107.103"
port = 53054
[ssl]
# SSL 配置
# 如果遇到 SSL 证书验证失败,可以设置为 false 跳过验证
verify = false

View File

@@ -7,6 +7,9 @@ TavilySearch 联网搜索插件
import tomllib
import aiohttp
import uuid
import asyncio
import re
from pathlib import Path
from typing import List, Optional
from loguru import logger
@@ -25,6 +28,7 @@ class TavilySearch(PluginBase):
self.config = None
self.api_keys = []
self.current_key_index = 0
self.temp_dir: Optional[Path] = None
async def async_init(self):
"""异步初始化"""
@@ -37,7 +41,10 @@ class TavilySearch(PluginBase):
with open(config_path, "rb") as f:
self.config = tomllib.load(f)
self.api_keys = [k for k in self.config["tavily"]["api_keys"] if k and not k.startswith("#")]
self.temp_dir = Path(__file__).parent / "temp"
self.temp_dir.mkdir(exist_ok=True)
self.api_keys = self._load_api_keys()
if not self.api_keys:
logger.warning("TavilySearch: 未配置有效的 API Key")
else:
@@ -47,6 +54,36 @@ class TavilySearch(PluginBase):
logger.error(f"TavilySearch 初始化失败: {e}")
self.config = None
def _load_api_keys(self) -> List[str]:
"""从配置加载 API Keys兼容 api_key / api_keys"""
if not self.config:
return []
tavily_config = self.config.get("tavily", {})
keys: List[str] = []
raw_keys = tavily_config.get("api_keys", [])
if isinstance(raw_keys, str):
keys.extend([k.strip() for k in raw_keys.replace("\n", ",").split(",")])
elif isinstance(raw_keys, list):
keys.extend([str(k).strip() for k in raw_keys])
single_key = str(tavily_config.get("api_key", "")).strip()
if single_key:
keys.append(single_key)
cleaned = []
seen = set()
for k in keys:
if not k or k.startswith("#"):
continue
if k in seen:
continue
seen.add(k)
cleaned.append(k)
return cleaned
def _get_next_api_key(self) -> str:
"""轮询获取下一个 API Key"""
if not self.api_keys:
@@ -55,25 +92,150 @@ class TavilySearch(PluginBase):
self.current_key_index = (self.current_key_index + 1) % len(self.api_keys)
return key
def _clean_query_text(self, text: str) -> str:
"""清洗查询文本"""
cleaned = str(text or "").strip()
if not cleaned:
return ""
cleaned = cleaned.replace("【当前消息】", "").strip()
cleaned = re.sub(r"^(?:@\S+\s*)+", "", cleaned)
cleaned = re.sub(
r"^(?:请|帮我|麻烦|请帮我)?(?:搜索|搜|查|查询|检索|搜一下|查一下|搜索下|搜下)\s*",
"",
cleaned,
)
return cleaned.strip()
def _extract_topic_hint(self, query: str) -> str:
"""提取主题前缀,用于补全后续子问题上下文"""
text = self._clean_query_text(query)
if not text:
return ""
first_part = text
for sep in ("", "以及", "并且", "还有", "同时", "", ",", "", ";", ""):
idx = first_part.find(sep)
if idx > 0:
first_part = first_part[:idx].strip()
break
match = re.match(r"^(.{2,40}?)(?:的|是|有哪些|包括|改动|更新|介绍|详情|内容|情况)", first_part)
topic_hint = match.group(1).strip() if match else ""
if not topic_hint and len(first_part) <= 40:
topic_hint = first_part
topic_hint = re.sub(r"(是什么|有哪些|有啥|是什么样).*$", "", topic_hint).strip()
return topic_hint
def _split_multi_queries(self, query: str, tavily_config: dict) -> List[str]:
"""将复合问题拆分为多个子查询"""
split_debug_log = bool(tavily_config.get("split_debug_log", False))
raw = self._clean_query_text(query)
if not raw:
return []
if split_debug_log:
logger.info(f"[TavilySplit] 原始查询: {query}")
logger.info(f"[TavilySplit] 清洗后查询: {raw}")
max_sub_queries = int(tavily_config.get("max_sub_queries", 4) or 4)
split_min_chars = int(tavily_config.get("split_min_chars", 6) or 6)
prepend_context = bool(tavily_config.get("prepend_context_for_sub_query", True))
normalized = raw
normalized = re.sub(r"(另外|此外|同时|并且|还有|以及|然后|再者|顺便)", "", normalized)
normalized = re.sub(r"[;。!?!?\n\r]+", "", normalized)
parts = [
p.strip(" ,、|")
for p in normalized.split("")
if p.strip(" ,、|")
]
if split_debug_log:
logger.info(f"[TavilySplit] 初步拆分片段: {parts}")
if len(parts) == 1:
single = parts[0]
if "" in single and len(single) >= split_min_chars * 2:
candidate = re.split(r"\s*和\s*", single, maxsplit=1)
if len(candidate) == 2:
left = candidate[0].strip()
right = candidate[1].strip()
if len(left) >= split_min_chars and len(right) >= split_min_chars:
parts = [left, right]
if split_debug_log:
logger.info(f"[TavilySplit] 通过“和”二次拆分: {parts}")
# 语义拆分兜底:即使没有明显连接词,也尽量把“版本改动 + 英雄技能介绍”拆开
if len(parts) == 1:
single = parts[0].strip()
change_keywords = ("改动", "更新", "变更", "调整", "改版", "平衡")
hero_keywords = ("新英雄", "英雄", "技能", "机制", "天赋", "介绍", "详解")
change_pos = min([single.find(k) for k in change_keywords if k in single] or [-1])
hero_pos = min([single.find(k) for k in hero_keywords if k in single] or [-1])
if change_pos >= 0 and hero_pos >= 0 and hero_pos > change_pos:
left = single[:hero_pos].strip(" ,、")
right = single[hero_pos:].strip(" ,、")
if len(left) >= split_min_chars and len(right) >= split_min_chars:
topic_hint = self._extract_topic_hint(left or single)
if topic_hint and topic_hint not in right:
right = f"{topic_hint} {right}".strip()
parts = [left, right]
if split_debug_log:
logger.info(f"[TavilySplit] 语义兜底拆分: {parts}")
deduped: List[str] = []
seen = set()
for p in parts:
if len(p) < split_min_chars:
continue
if p in seen:
continue
seen.add(p)
deduped.append(p)
parts = deduped[:max_sub_queries] if deduped else [raw]
if split_debug_log:
logger.info(f"[TavilySplit] 去重截断后: {parts}")
if prepend_context and len(parts) > 1:
topic_hint = self._extract_topic_hint(parts[0] or raw)
if topic_hint:
with_context: List[str] = []
for idx, p in enumerate(parts):
item = p
if idx > 0 and topic_hint not in item:
item = f"{topic_hint} {item}".strip()
with_context.append(item)
parts = with_context
if split_debug_log:
logger.info(f"[TavilySplit] 主题前缀: {topic_hint}")
logger.info(f"[TavilySplit] 前缀补全后: {parts}")
if split_debug_log:
logger.info(f"[TavilySplit] 最终子查询({len(parts)}): {parts}")
return parts
def _truncate_text(self, text: str, max_chars: int) -> str:
"""按字符数截断文本"""
content = str(text or "").strip()
if max_chars <= 0 or len(content) <= max_chars:
return content
return content[:max_chars].rstrip() + "..."
async def _search_tavily(self, query: str) -> Optional[dict]:
"""调用 Tavily API 进行搜索"""
api_key = self._get_next_api_key()
if not api_key:
logger.error("没有可用的 Tavily API Key")
return None
tavily_config = self.config["tavily"]
proxy_config = self.config.get("proxy", {})
payload = {
"api_key": api_key,
"query": query,
"search_depth": tavily_config.get("search_depth", "basic"),
"max_results": tavily_config.get("max_results", 5),
"include_raw_content": tavily_config.get("include_raw_content", False),
"include_images": tavily_config.get("include_images", False),
}
proxy = None
if proxy_config.get("enabled", False):
proxy_type = proxy_config.get("type", "http")
@@ -97,37 +259,179 @@ class TavilySearch(PluginBase):
ssl_context.verify_mode = ssl.CERT_NONE
connector = aiohttp.TCPConnector(ssl=ssl_context)
if not self.api_keys:
logger.error("没有可用的 Tavily API Key")
return None
max_attempts = min(len(self.api_keys), tavily_config.get("max_key_attempts", len(self.api_keys)))
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
async with session.post(
"https://api.tavily.com/search",
json=payload,
proxy=proxy
) as resp:
if resp.status == 200:
result = await resp.json()
logger.info(f"Tavily 搜索成功: {query[:30]}...")
logger.info(f"Tavily 原始返回: {result}")
return result
else:
for attempt in range(max_attempts):
api_key = self._get_next_api_key()
if not api_key:
logger.error("没有可用的 Tavily API Key")
return None
payload = {
"api_key": api_key,
"query": query,
"search_depth": tavily_config.get("search_depth", "basic"),
"max_results": tavily_config.get("max_results", 5),
"include_raw_content": (
tavily_config.get("include_raw_content", False)
or tavily_config.get("use_raw_content_in_result", False)
),
"include_images": tavily_config.get("include_images", False),
}
async with session.post(
"https://api.tavily.com/search",
json=payload,
proxy=proxy
) as resp:
if resp.status == 200:
result = await resp.json()
logger.info(f"Tavily 搜索成功: {query[:30]}...")
logger.info(f"Tavily 原始返回: {result}")
return result
error_text = await resp.text()
logger.error(f"Tavily API 错误: {resp.status}, {error_text}")
logger.warning(
f"Tavily API 错误: {resp.status}, 尝试 key {attempt + 1}/{max_attempts}, "
f"body={error_text[:200]}"
)
if resp.status in {401, 403, 429}:
continue
return None
except Exception as e:
logger.error(f"Tavily 搜索失败: {e}")
return None
def _format_search_results(self, results: dict) -> str:
def _extract_image_urls(self, results: dict) -> List[str]:
"""从搜索结果中提取图片 URL"""
if not results:
return []
images = results.get("images", [])
urls: List[str] = []
for item in images:
if isinstance(item, str):
url = item.strip()
elif isinstance(item, dict):
url = (item.get("url") or item.get("image") or item.get("src") or "").strip()
else:
url = ""
if url:
urls.append(url)
return urls
async def _download_image_with_session(
self,
session: aiohttp.ClientSession,
url: str,
proxy: Optional[str],
max_retries: int = 1
) -> Optional[str]:
"""下载图片到本地临时目录(复用 session"""
if not self.temp_dir:
return None
for attempt in range(max_retries + 1):
try:
async with session.get(url, proxy=proxy) as resp:
if resp.status != 200:
if attempt >= max_retries:
return None
await asyncio.sleep(0.5 * (attempt + 1))
continue
content = await resp.read()
ext = Path(url).suffix.lower()
if ext not in {".jpg", ".jpeg", ".png", ".webp"}:
ext = ".jpg"
filename = f"tavily_{uuid.uuid4().hex}{ext}"
save_path = self.temp_dir / filename
with open(save_path, "wb") as f:
f.write(content)
return str(save_path)
except Exception as e:
if attempt < max_retries:
await asyncio.sleep(0.5 * (attempt + 1))
continue
logger.warning(f"下载图片失败: {url} -> {e}")
return None
async def _download_image(self, url: str) -> Optional[str]:
"""下载图片到本地临时目录(兼容旧调用)"""
if not self.temp_dir:
return None
try:
import ssl
timeout = aiohttp.ClientTimeout(total=30)
proxy_config = self.config.get("proxy", {}) if self.config else {}
proxy = None
if proxy_config.get("enabled", False):
proxy_type = proxy_config.get("type", "http")
proxy_host = proxy_config.get("host", "127.0.0.1")
proxy_port = proxy_config.get("port", 7890)
proxy = f"{proxy_type}://{proxy_host}:{proxy_port}"
ssl_config = self.config.get("ssl", {}) if self.config else {}
ssl_verify = ssl_config.get("verify", True)
ssl_context = None
if not ssl_verify:
ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
connector = aiohttp.TCPConnector(ssl=ssl_context) if ssl_context else None
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
return await self._download_image_with_session(session, url, proxy, max_retries=1)
except Exception as e:
logger.warning(f"下载图片失败: {url} -> {e}")
return None
def _format_search_results(
self,
results: dict,
*,
include_raw_content: bool = False,
raw_content_max_chars: int = 1800,
section_title: Optional[str] = None,
) -> str:
"""格式化搜索结果供 AI 处理"""
if not results or "results" not in results:
if section_title:
return f"{section_title}\n未找到相关搜索结果"
return "未找到相关搜索结果"
formatted = []
if section_title:
formatted.append(section_title)
for i, item in enumerate(results["results"], 1):
title = item.get("title", "无标题")
content = item.get("content", "")
url = item.get("url", "")
formatted.append(f"【结果 {i}\n标题: {title}\n内容: {content}\n来源: {url}\n")
block = [
f"【结果 {i}",
f"标题: {title}",
f"内容: {content}",
f"来源: {url}",
]
if include_raw_content:
raw_content = self._truncate_text(item.get("raw_content", ""), raw_content_max_chars)
if raw_content:
block.append(f"原文摘录: {raw_content}")
formatted.append("\n".join(block) + "\n")
return "\n".join(formatted)
@@ -141,16 +445,21 @@ class TavilySearch(PluginBase):
"type": "function",
"function": {
"name": "tavily_web_search",
"description": "仅当用户明确要求“联网搜索/查资料/最新信息/来源/权威说法”或需要事实核实时调用;不要在闲聊中触发。",
"description": (
"执行联网检索并返回可引用的信息来源。"
"仅在用户明确要求查资料、最新信息、权威来源或需要事实核实时调用;"
"可直接回答的问题不要触发该工具。"
),
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "搜索关键词或问题,建议使用简洁明确的搜索词"
"description": "检索问题或关键词。应简洁明确,避免口语噪声。"
}
},
"required": ["query"]
"required": ["query"],
"additionalProperties": False
}
}
}
@@ -175,16 +484,124 @@ class TavilySearch(PluginBase):
if not query:
return {"success": False, "message": "搜索关键词不能为空"}
tavily_config = self.config.get("tavily", {})
multi_query_split = bool(tavily_config.get("multi_query_split", True))
use_raw_content_in_result = bool(tavily_config.get("use_raw_content_in_result", False))
raw_content_max_chars = int(tavily_config.get("raw_content_max_chars", 1800) or 1800)
try:
logger.info(f"开始 Tavily 搜索: {query}")
# 调用 Tavily 搜索
search_results = await self._search_tavily(query)
if not search_results:
split_debug_log = bool(tavily_config.get("split_debug_log", False))
if multi_query_split:
sub_queries = self._split_multi_queries(query, tavily_config)
else:
cleaned_query = self._clean_query_text(query)
sub_queries = [cleaned_query] if cleaned_query else [str(query).strip()]
if not sub_queries:
return {"success": False, "message": "搜索关键词不能为空"}
if split_debug_log:
logger.info(f"Tavily 子问题拆分完成,共 {len(sub_queries)} 个: {sub_queries}")
else:
logger.info(f"Tavily 子问题拆分完成,共 {len(sub_queries)}")
search_batches = []
failed_queries = []
for sub_query in sub_queries:
result = await self._search_tavily(sub_query)
if result:
search_batches.append((sub_query, result))
else:
failed_queries.append(sub_query)
if not search_batches:
return {"success": False, "message": "搜索失败,请稍后重试"}
# 发送搜索图片(若开启 include_images
if tavily_config.get("include_images", False):
image_urls = []
for _sub_query, sub_result in search_batches:
image_urls.extend(self._extract_image_urls(sub_result))
if image_urls:
image_urls = list(dict.fromkeys(image_urls))
max_images = int(tavily_config.get("max_images", 3) or 3)
download_concurrency = int(tavily_config.get("image_download_concurrency", 3) or 3)
download_retries = int(tavily_config.get("image_download_retries", 1) or 1)
download_timeout = int(tavily_config.get("image_download_timeout", 30) or 30)
import ssl
timeout = aiohttp.ClientTimeout(total=download_timeout)
proxy_config = self.config.get("proxy", {}) if self.config else {}
proxy = None
if proxy_config.get("enabled", False):
proxy_type = proxy_config.get("type", "http")
proxy_host = proxy_config.get("host", "127.0.0.1")
proxy_port = proxy_config.get("port", 7890)
proxy = f"{proxy_type}://{proxy_host}:{proxy_port}"
ssl_config = self.config.get("ssl", {}) if self.config else {}
ssl_verify = ssl_config.get("verify", True)
ssl_context = None
if not ssl_verify:
ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
connector = aiohttp.TCPConnector(ssl=ssl_context) if ssl_context else None
semaphore = asyncio.Semaphore(max(1, download_concurrency))
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
async def fetch_image(url: str) -> Optional[str]:
async with semaphore:
return await self._download_image_with_session(
session,
url,
proxy,
max_retries=download_retries
)
tasks = [fetch_image(url) for url in image_urls[:max_images]]
results = await asyncio.gather(*tasks, return_exceptions=True)
sent = 0
for result in results:
if sent >= max_images:
break
if isinstance(result, str) and result:
await bot.send_image(from_wxid, result)
sent += 1
# 格式化搜索结果
formatted_results = self._format_search_results(search_results)
if len(search_batches) == 1:
formatted_results = self._format_search_results(
search_batches[0][1],
include_raw_content=use_raw_content_in_result,
raw_content_max_chars=raw_content_max_chars,
)
else:
sections = []
for idx, (sub_query, sub_result) in enumerate(search_batches, 1):
sections.append(
self._format_search_results(
sub_result,
include_raw_content=use_raw_content_in_result,
raw_content_max_chars=raw_content_max_chars,
section_title=f"【子问题 {idx}{sub_query}",
)
)
formatted_results = "\n\n".join(sections)
if failed_queries:
failed_text = "\n".join([f"- {q}" for q in failed_queries])
formatted_results = (
f"{formatted_results}\n\n"
f"【未检索成功的子问题】\n{failed_text}"
)
logger.success(f"Tavily 搜索完成: {query[:30]}...")