chore: sync current WechatHookBot workspace
This commit is contained in:
75
plugins/TavilySearch/config.toml
Normal file
75
plugins/TavilySearch/config.toml
Normal file
@@ -0,0 +1,75 @@
|
||||
# TavilySearch 联网搜索插件配置
|
||||
# 本插件仅作为 LLM Tool 供 AIChat 插件调用
|
||||
# 搜索结果会返回给 AIChat 的 AI 进行处理和回复
|
||||
|
||||
[tavily]
|
||||
# Tavily API 密钥列表,支持多 key 轮询
|
||||
# 获取地址: https://tavily.com/
|
||||
# 兼容:也可使用 api_key = "xxx"
|
||||
api_keys = [
|
||||
"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",
|
||||
"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",
|
||||
"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",
|
||||
"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", # 可添加多个 key
|
||||
]
|
||||
#
|
||||
# 可选:每次请求最多尝试几个 key(默认=全部)
|
||||
# max_key_attempts = 3
|
||||
|
||||
# 搜索深度: "basic" 或 "advanced"
|
||||
# basic: 快速搜索,适合简单查询
|
||||
# advanced: 深度搜索,结果更全面但较慢
|
||||
search_depth = "advanced"
|
||||
|
||||
# 每次搜索返回的结果数量 (1-10)
|
||||
max_results = 5
|
||||
|
||||
# 是否包含原始内容(会增加返回数据量)
|
||||
include_raw_content = false
|
||||
|
||||
# 是否在返回给 AI 的结果中带上原文摘录(独立开关)
|
||||
# 打开后会自动请求 raw_content,并按 raw_content_max_chars 截断
|
||||
use_raw_content_in_result = false
|
||||
|
||||
# 原文摘录最大字符数(防止上下文过长)
|
||||
raw_content_max_chars = 1800
|
||||
|
||||
# 是否包含图片
|
||||
include_images = true
|
||||
|
||||
# 当 include_images = true 时,最多发送几张图片
|
||||
max_images = 3
|
||||
image_download_concurrency = 3
|
||||
image_download_retries = 1
|
||||
image_download_timeout = 30
|
||||
|
||||
# 是否自动拆分多子问题并分别检索
|
||||
multi_query_split = true
|
||||
|
||||
# 单次最多拆分并检索的子问题数
|
||||
max_sub_queries = 4
|
||||
|
||||
# 子问题最小长度(字符)
|
||||
split_min_chars = 6
|
||||
|
||||
# 拆分后是否自动补充上下文前缀(提升“第二问”检索准确度)
|
||||
prepend_context_for_sub_query = true
|
||||
|
||||
# 是否输出“子问题拆分”调试日志(清洗结果、拆分片段、最终子查询)
|
||||
split_debug_log = false
|
||||
|
||||
[behavior]
|
||||
# 是否启用插件
|
||||
enabled = true
|
||||
|
||||
[proxy]
|
||||
# 代理配置(可选,用于访问 Tavily API)
|
||||
enabled = false
|
||||
type = "http"
|
||||
host = "38.55.107.103"
|
||||
port = 53054
|
||||
|
||||
[ssl]
|
||||
# SSL 配置
|
||||
# 如果遇到 SSL 证书验证失败,可以设置为 false 跳过验证
|
||||
verify = false
|
||||
@@ -7,6 +7,9 @@ TavilySearch 联网搜索插件
|
||||
|
||||
import tomllib
|
||||
import aiohttp
|
||||
import uuid
|
||||
import asyncio
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from loguru import logger
|
||||
@@ -25,6 +28,7 @@ class TavilySearch(PluginBase):
|
||||
self.config = None
|
||||
self.api_keys = []
|
||||
self.current_key_index = 0
|
||||
self.temp_dir: Optional[Path] = None
|
||||
|
||||
async def async_init(self):
|
||||
"""异步初始化"""
|
||||
@@ -37,7 +41,10 @@ class TavilySearch(PluginBase):
|
||||
with open(config_path, "rb") as f:
|
||||
self.config = tomllib.load(f)
|
||||
|
||||
self.api_keys = [k for k in self.config["tavily"]["api_keys"] if k and not k.startswith("#")]
|
||||
self.temp_dir = Path(__file__).parent / "temp"
|
||||
self.temp_dir.mkdir(exist_ok=True)
|
||||
|
||||
self.api_keys = self._load_api_keys()
|
||||
if not self.api_keys:
|
||||
logger.warning("TavilySearch: 未配置有效的 API Key")
|
||||
else:
|
||||
@@ -47,6 +54,36 @@ class TavilySearch(PluginBase):
|
||||
logger.error(f"TavilySearch 初始化失败: {e}")
|
||||
self.config = None
|
||||
|
||||
def _load_api_keys(self) -> List[str]:
|
||||
"""从配置加载 API Keys(兼容 api_key / api_keys)"""
|
||||
if not self.config:
|
||||
return []
|
||||
|
||||
tavily_config = self.config.get("tavily", {})
|
||||
keys: List[str] = []
|
||||
|
||||
raw_keys = tavily_config.get("api_keys", [])
|
||||
if isinstance(raw_keys, str):
|
||||
keys.extend([k.strip() for k in raw_keys.replace("\n", ",").split(",")])
|
||||
elif isinstance(raw_keys, list):
|
||||
keys.extend([str(k).strip() for k in raw_keys])
|
||||
|
||||
single_key = str(tavily_config.get("api_key", "")).strip()
|
||||
if single_key:
|
||||
keys.append(single_key)
|
||||
|
||||
cleaned = []
|
||||
seen = set()
|
||||
for k in keys:
|
||||
if not k or k.startswith("#"):
|
||||
continue
|
||||
if k in seen:
|
||||
continue
|
||||
seen.add(k)
|
||||
cleaned.append(k)
|
||||
|
||||
return cleaned
|
||||
|
||||
def _get_next_api_key(self) -> str:
|
||||
"""轮询获取下一个 API Key"""
|
||||
if not self.api_keys:
|
||||
@@ -55,25 +92,150 @@ class TavilySearch(PluginBase):
|
||||
self.current_key_index = (self.current_key_index + 1) % len(self.api_keys)
|
||||
return key
|
||||
|
||||
def _clean_query_text(self, text: str) -> str:
|
||||
"""清洗查询文本"""
|
||||
cleaned = str(text or "").strip()
|
||||
if not cleaned:
|
||||
return ""
|
||||
|
||||
cleaned = cleaned.replace("【当前消息】", "").strip()
|
||||
cleaned = re.sub(r"^(?:@\S+\s*)+", "", cleaned)
|
||||
cleaned = re.sub(
|
||||
r"^(?:请|帮我|麻烦|请帮我)?(?:搜索|搜|查|查询|检索|搜一下|查一下|搜索下|搜下)\s*",
|
||||
"",
|
||||
cleaned,
|
||||
)
|
||||
return cleaned.strip()
|
||||
|
||||
def _extract_topic_hint(self, query: str) -> str:
|
||||
"""提取主题前缀,用于补全后续子问题上下文"""
|
||||
text = self._clean_query_text(query)
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
first_part = text
|
||||
for sep in ("和", "以及", "并且", "还有", "同时", ",", ",", ";", ";", "。"):
|
||||
idx = first_part.find(sep)
|
||||
if idx > 0:
|
||||
first_part = first_part[:idx].strip()
|
||||
break
|
||||
|
||||
match = re.match(r"^(.{2,40}?)(?:的|是|有哪些|包括|改动|更新|介绍|详情|内容|情况)", first_part)
|
||||
topic_hint = match.group(1).strip() if match else ""
|
||||
if not topic_hint and len(first_part) <= 40:
|
||||
topic_hint = first_part
|
||||
|
||||
topic_hint = re.sub(r"(是什么|有哪些|有啥|是什么样).*$", "", topic_hint).strip()
|
||||
return topic_hint
|
||||
|
||||
def _split_multi_queries(self, query: str, tavily_config: dict) -> List[str]:
|
||||
"""将复合问题拆分为多个子查询"""
|
||||
split_debug_log = bool(tavily_config.get("split_debug_log", False))
|
||||
raw = self._clean_query_text(query)
|
||||
if not raw:
|
||||
return []
|
||||
|
||||
if split_debug_log:
|
||||
logger.info(f"[TavilySplit] 原始查询: {query}")
|
||||
logger.info(f"[TavilySplit] 清洗后查询: {raw}")
|
||||
|
||||
max_sub_queries = int(tavily_config.get("max_sub_queries", 4) or 4)
|
||||
split_min_chars = int(tavily_config.get("split_min_chars", 6) or 6)
|
||||
prepend_context = bool(tavily_config.get("prepend_context_for_sub_query", True))
|
||||
|
||||
normalized = raw
|
||||
normalized = re.sub(r"(另外|此外|同时|并且|还有|以及|然后|再者|顺便)", "|", normalized)
|
||||
normalized = re.sub(r"[;;。!?!?\n\r]+", "|", normalized)
|
||||
|
||||
parts = [
|
||||
p.strip(" ,,、||")
|
||||
for p in normalized.split("|")
|
||||
if p.strip(" ,,、||")
|
||||
]
|
||||
|
||||
if split_debug_log:
|
||||
logger.info(f"[TavilySplit] 初步拆分片段: {parts}")
|
||||
|
||||
if len(parts) == 1:
|
||||
single = parts[0]
|
||||
if "和" in single and len(single) >= split_min_chars * 2:
|
||||
candidate = re.split(r"\s*和\s*", single, maxsplit=1)
|
||||
if len(candidate) == 2:
|
||||
left = candidate[0].strip()
|
||||
right = candidate[1].strip()
|
||||
if len(left) >= split_min_chars and len(right) >= split_min_chars:
|
||||
parts = [left, right]
|
||||
if split_debug_log:
|
||||
logger.info(f"[TavilySplit] 通过“和”二次拆分: {parts}")
|
||||
|
||||
# 语义拆分兜底:即使没有明显连接词,也尽量把“版本改动 + 英雄技能介绍”拆开
|
||||
if len(parts) == 1:
|
||||
single = parts[0].strip()
|
||||
change_keywords = ("改动", "更新", "变更", "调整", "改版", "平衡")
|
||||
hero_keywords = ("新英雄", "英雄", "技能", "机制", "天赋", "介绍", "详解")
|
||||
|
||||
change_pos = min([single.find(k) for k in change_keywords if k in single] or [-1])
|
||||
hero_pos = min([single.find(k) for k in hero_keywords if k in single] or [-1])
|
||||
|
||||
if change_pos >= 0 and hero_pos >= 0 and hero_pos > change_pos:
|
||||
left = single[:hero_pos].strip(" ,,、")
|
||||
right = single[hero_pos:].strip(" ,,、")
|
||||
|
||||
if len(left) >= split_min_chars and len(right) >= split_min_chars:
|
||||
topic_hint = self._extract_topic_hint(left or single)
|
||||
if topic_hint and topic_hint not in right:
|
||||
right = f"{topic_hint} {right}".strip()
|
||||
|
||||
parts = [left, right]
|
||||
if split_debug_log:
|
||||
logger.info(f"[TavilySplit] 语义兜底拆分: {parts}")
|
||||
|
||||
deduped: List[str] = []
|
||||
seen = set()
|
||||
for p in parts:
|
||||
if len(p) < split_min_chars:
|
||||
continue
|
||||
if p in seen:
|
||||
continue
|
||||
seen.add(p)
|
||||
deduped.append(p)
|
||||
|
||||
parts = deduped[:max_sub_queries] if deduped else [raw]
|
||||
|
||||
if split_debug_log:
|
||||
logger.info(f"[TavilySplit] 去重截断后: {parts}")
|
||||
|
||||
if prepend_context and len(parts) > 1:
|
||||
topic_hint = self._extract_topic_hint(parts[0] or raw)
|
||||
if topic_hint:
|
||||
with_context: List[str] = []
|
||||
for idx, p in enumerate(parts):
|
||||
item = p
|
||||
if idx > 0 and topic_hint not in item:
|
||||
item = f"{topic_hint} {item}".strip()
|
||||
with_context.append(item)
|
||||
parts = with_context
|
||||
if split_debug_log:
|
||||
logger.info(f"[TavilySplit] 主题前缀: {topic_hint}")
|
||||
logger.info(f"[TavilySplit] 前缀补全后: {parts}")
|
||||
|
||||
if split_debug_log:
|
||||
logger.info(f"[TavilySplit] 最终子查询({len(parts)}): {parts}")
|
||||
|
||||
return parts
|
||||
|
||||
def _truncate_text(self, text: str, max_chars: int) -> str:
|
||||
"""按字符数截断文本"""
|
||||
content = str(text or "").strip()
|
||||
if max_chars <= 0 or len(content) <= max_chars:
|
||||
return content
|
||||
return content[:max_chars].rstrip() + "..."
|
||||
|
||||
async def _search_tavily(self, query: str) -> Optional[dict]:
|
||||
"""调用 Tavily API 进行搜索"""
|
||||
api_key = self._get_next_api_key()
|
||||
if not api_key:
|
||||
logger.error("没有可用的 Tavily API Key")
|
||||
return None
|
||||
|
||||
tavily_config = self.config["tavily"]
|
||||
proxy_config = self.config.get("proxy", {})
|
||||
|
||||
payload = {
|
||||
"api_key": api_key,
|
||||
"query": query,
|
||||
"search_depth": tavily_config.get("search_depth", "basic"),
|
||||
"max_results": tavily_config.get("max_results", 5),
|
||||
"include_raw_content": tavily_config.get("include_raw_content", False),
|
||||
"include_images": tavily_config.get("include_images", False),
|
||||
}
|
||||
|
||||
proxy = None
|
||||
if proxy_config.get("enabled", False):
|
||||
proxy_type = proxy_config.get("type", "http")
|
||||
@@ -97,37 +259,179 @@ class TavilySearch(PluginBase):
|
||||
ssl_context.verify_mode = ssl.CERT_NONE
|
||||
connector = aiohttp.TCPConnector(ssl=ssl_context)
|
||||
|
||||
if not self.api_keys:
|
||||
logger.error("没有可用的 Tavily API Key")
|
||||
return None
|
||||
|
||||
max_attempts = min(len(self.api_keys), tavily_config.get("max_key_attempts", len(self.api_keys)))
|
||||
|
||||
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
|
||||
async with session.post(
|
||||
"https://api.tavily.com/search",
|
||||
json=payload,
|
||||
proxy=proxy
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
result = await resp.json()
|
||||
logger.info(f"Tavily 搜索成功: {query[:30]}...")
|
||||
logger.info(f"Tavily 原始返回: {result}")
|
||||
return result
|
||||
else:
|
||||
for attempt in range(max_attempts):
|
||||
api_key = self._get_next_api_key()
|
||||
if not api_key:
|
||||
logger.error("没有可用的 Tavily API Key")
|
||||
return None
|
||||
|
||||
payload = {
|
||||
"api_key": api_key,
|
||||
"query": query,
|
||||
"search_depth": tavily_config.get("search_depth", "basic"),
|
||||
"max_results": tavily_config.get("max_results", 5),
|
||||
"include_raw_content": (
|
||||
tavily_config.get("include_raw_content", False)
|
||||
or tavily_config.get("use_raw_content_in_result", False)
|
||||
),
|
||||
"include_images": tavily_config.get("include_images", False),
|
||||
}
|
||||
|
||||
async with session.post(
|
||||
"https://api.tavily.com/search",
|
||||
json=payload,
|
||||
proxy=proxy
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
result = await resp.json()
|
||||
logger.info(f"Tavily 搜索成功: {query[:30]}...")
|
||||
logger.info(f"Tavily 原始返回: {result}")
|
||||
return result
|
||||
|
||||
error_text = await resp.text()
|
||||
logger.error(f"Tavily API 错误: {resp.status}, {error_text}")
|
||||
logger.warning(
|
||||
f"Tavily API 错误: {resp.status}, 尝试 key {attempt + 1}/{max_attempts}, "
|
||||
f"body={error_text[:200]}"
|
||||
)
|
||||
|
||||
if resp.status in {401, 403, 429}:
|
||||
continue
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Tavily 搜索失败: {e}")
|
||||
return None
|
||||
|
||||
def _format_search_results(self, results: dict) -> str:
|
||||
def _extract_image_urls(self, results: dict) -> List[str]:
|
||||
"""从搜索结果中提取图片 URL"""
|
||||
if not results:
|
||||
return []
|
||||
|
||||
images = results.get("images", [])
|
||||
urls: List[str] = []
|
||||
for item in images:
|
||||
if isinstance(item, str):
|
||||
url = item.strip()
|
||||
elif isinstance(item, dict):
|
||||
url = (item.get("url") or item.get("image") or item.get("src") or "").strip()
|
||||
else:
|
||||
url = ""
|
||||
|
||||
if url:
|
||||
urls.append(url)
|
||||
|
||||
return urls
|
||||
|
||||
async def _download_image_with_session(
|
||||
self,
|
||||
session: aiohttp.ClientSession,
|
||||
url: str,
|
||||
proxy: Optional[str],
|
||||
max_retries: int = 1
|
||||
) -> Optional[str]:
|
||||
"""下载图片到本地临时目录(复用 session)"""
|
||||
if not self.temp_dir:
|
||||
return None
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
async with session.get(url, proxy=proxy) as resp:
|
||||
if resp.status != 200:
|
||||
if attempt >= max_retries:
|
||||
return None
|
||||
await asyncio.sleep(0.5 * (attempt + 1))
|
||||
continue
|
||||
content = await resp.read()
|
||||
|
||||
ext = Path(url).suffix.lower()
|
||||
if ext not in {".jpg", ".jpeg", ".png", ".webp"}:
|
||||
ext = ".jpg"
|
||||
filename = f"tavily_{uuid.uuid4().hex}{ext}"
|
||||
save_path = self.temp_dir / filename
|
||||
with open(save_path, "wb") as f:
|
||||
f.write(content)
|
||||
return str(save_path)
|
||||
except Exception as e:
|
||||
if attempt < max_retries:
|
||||
await asyncio.sleep(0.5 * (attempt + 1))
|
||||
continue
|
||||
logger.warning(f"下载图片失败: {url} -> {e}")
|
||||
return None
|
||||
|
||||
async def _download_image(self, url: str) -> Optional[str]:
|
||||
"""下载图片到本地临时目录(兼容旧调用)"""
|
||||
if not self.temp_dir:
|
||||
return None
|
||||
try:
|
||||
import ssl
|
||||
timeout = aiohttp.ClientTimeout(total=30)
|
||||
|
||||
proxy_config = self.config.get("proxy", {}) if self.config else {}
|
||||
proxy = None
|
||||
if proxy_config.get("enabled", False):
|
||||
proxy_type = proxy_config.get("type", "http")
|
||||
proxy_host = proxy_config.get("host", "127.0.0.1")
|
||||
proxy_port = proxy_config.get("port", 7890)
|
||||
proxy = f"{proxy_type}://{proxy_host}:{proxy_port}"
|
||||
|
||||
ssl_config = self.config.get("ssl", {}) if self.config else {}
|
||||
ssl_verify = ssl_config.get("verify", True)
|
||||
ssl_context = None
|
||||
if not ssl_verify:
|
||||
ssl_context = ssl.create_default_context()
|
||||
ssl_context.check_hostname = False
|
||||
ssl_context.verify_mode = ssl.CERT_NONE
|
||||
|
||||
connector = aiohttp.TCPConnector(ssl=ssl_context) if ssl_context else None
|
||||
|
||||
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
|
||||
return await self._download_image_with_session(session, url, proxy, max_retries=1)
|
||||
except Exception as e:
|
||||
logger.warning(f"下载图片失败: {url} -> {e}")
|
||||
return None
|
||||
|
||||
def _format_search_results(
|
||||
self,
|
||||
results: dict,
|
||||
*,
|
||||
include_raw_content: bool = False,
|
||||
raw_content_max_chars: int = 1800,
|
||||
section_title: Optional[str] = None,
|
||||
) -> str:
|
||||
"""格式化搜索结果供 AI 处理"""
|
||||
if not results or "results" not in results:
|
||||
if section_title:
|
||||
return f"{section_title}\n未找到相关搜索结果"
|
||||
return "未找到相关搜索结果"
|
||||
|
||||
formatted = []
|
||||
if section_title:
|
||||
formatted.append(section_title)
|
||||
|
||||
for i, item in enumerate(results["results"], 1):
|
||||
title = item.get("title", "无标题")
|
||||
content = item.get("content", "")
|
||||
url = item.get("url", "")
|
||||
formatted.append(f"【结果 {i}】\n标题: {title}\n内容: {content}\n来源: {url}\n")
|
||||
|
||||
block = [
|
||||
f"【结果 {i}】",
|
||||
f"标题: {title}",
|
||||
f"内容: {content}",
|
||||
f"来源: {url}",
|
||||
]
|
||||
|
||||
if include_raw_content:
|
||||
raw_content = self._truncate_text(item.get("raw_content", ""), raw_content_max_chars)
|
||||
if raw_content:
|
||||
block.append(f"原文摘录: {raw_content}")
|
||||
|
||||
formatted.append("\n".join(block) + "\n")
|
||||
|
||||
return "\n".join(formatted)
|
||||
|
||||
@@ -141,16 +445,21 @@ class TavilySearch(PluginBase):
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "tavily_web_search",
|
||||
"description": "仅当用户明确要求“联网搜索/查资料/最新信息/来源/权威说法”或需要事实核实时调用;不要在闲聊中触发。",
|
||||
"description": (
|
||||
"执行联网检索并返回可引用的信息来源。"
|
||||
"仅在用户明确要求查资料、最新信息、权威来源或需要事实核实时调用;"
|
||||
"可直接回答的问题不要触发该工具。"
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "搜索关键词或问题,建议使用简洁明确的搜索词"
|
||||
"description": "检索问题或关键词。应简洁、明确,避免口语噪声。"
|
||||
}
|
||||
},
|
||||
"required": ["query"]
|
||||
"required": ["query"],
|
||||
"additionalProperties": False
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -175,16 +484,124 @@ class TavilySearch(PluginBase):
|
||||
if not query:
|
||||
return {"success": False, "message": "搜索关键词不能为空"}
|
||||
|
||||
tavily_config = self.config.get("tavily", {})
|
||||
multi_query_split = bool(tavily_config.get("multi_query_split", True))
|
||||
use_raw_content_in_result = bool(tavily_config.get("use_raw_content_in_result", False))
|
||||
raw_content_max_chars = int(tavily_config.get("raw_content_max_chars", 1800) or 1800)
|
||||
|
||||
try:
|
||||
logger.info(f"开始 Tavily 搜索: {query}")
|
||||
|
||||
# 调用 Tavily 搜索
|
||||
search_results = await self._search_tavily(query)
|
||||
if not search_results:
|
||||
split_debug_log = bool(tavily_config.get("split_debug_log", False))
|
||||
|
||||
if multi_query_split:
|
||||
sub_queries = self._split_multi_queries(query, tavily_config)
|
||||
else:
|
||||
cleaned_query = self._clean_query_text(query)
|
||||
sub_queries = [cleaned_query] if cleaned_query else [str(query).strip()]
|
||||
|
||||
if not sub_queries:
|
||||
return {"success": False, "message": "搜索关键词不能为空"}
|
||||
|
||||
if split_debug_log:
|
||||
logger.info(f"Tavily 子问题拆分完成,共 {len(sub_queries)} 个: {sub_queries}")
|
||||
else:
|
||||
logger.info(f"Tavily 子问题拆分完成,共 {len(sub_queries)} 个")
|
||||
|
||||
search_batches = []
|
||||
failed_queries = []
|
||||
for sub_query in sub_queries:
|
||||
result = await self._search_tavily(sub_query)
|
||||
if result:
|
||||
search_batches.append((sub_query, result))
|
||||
else:
|
||||
failed_queries.append(sub_query)
|
||||
|
||||
if not search_batches:
|
||||
return {"success": False, "message": "搜索失败,请稍后重试"}
|
||||
|
||||
# 发送搜索图片(若开启 include_images)
|
||||
if tavily_config.get("include_images", False):
|
||||
image_urls = []
|
||||
for _sub_query, sub_result in search_batches:
|
||||
image_urls.extend(self._extract_image_urls(sub_result))
|
||||
|
||||
if image_urls:
|
||||
image_urls = list(dict.fromkeys(image_urls))
|
||||
|
||||
max_images = int(tavily_config.get("max_images", 3) or 3)
|
||||
download_concurrency = int(tavily_config.get("image_download_concurrency", 3) or 3)
|
||||
download_retries = int(tavily_config.get("image_download_retries", 1) or 1)
|
||||
download_timeout = int(tavily_config.get("image_download_timeout", 30) or 30)
|
||||
|
||||
import ssl
|
||||
timeout = aiohttp.ClientTimeout(total=download_timeout)
|
||||
proxy_config = self.config.get("proxy", {}) if self.config else {}
|
||||
proxy = None
|
||||
if proxy_config.get("enabled", False):
|
||||
proxy_type = proxy_config.get("type", "http")
|
||||
proxy_host = proxy_config.get("host", "127.0.0.1")
|
||||
proxy_port = proxy_config.get("port", 7890)
|
||||
proxy = f"{proxy_type}://{proxy_host}:{proxy_port}"
|
||||
|
||||
ssl_config = self.config.get("ssl", {}) if self.config else {}
|
||||
ssl_verify = ssl_config.get("verify", True)
|
||||
ssl_context = None
|
||||
if not ssl_verify:
|
||||
ssl_context = ssl.create_default_context()
|
||||
ssl_context.check_hostname = False
|
||||
ssl_context.verify_mode = ssl.CERT_NONE
|
||||
|
||||
connector = aiohttp.TCPConnector(ssl=ssl_context) if ssl_context else None
|
||||
semaphore = asyncio.Semaphore(max(1, download_concurrency))
|
||||
|
||||
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
|
||||
async def fetch_image(url: str) -> Optional[str]:
|
||||
async with semaphore:
|
||||
return await self._download_image_with_session(
|
||||
session,
|
||||
url,
|
||||
proxy,
|
||||
max_retries=download_retries
|
||||
)
|
||||
|
||||
tasks = [fetch_image(url) for url in image_urls[:max_images]]
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
sent = 0
|
||||
for result in results:
|
||||
if sent >= max_images:
|
||||
break
|
||||
if isinstance(result, str) and result:
|
||||
await bot.send_image(from_wxid, result)
|
||||
sent += 1
|
||||
|
||||
# 格式化搜索结果
|
||||
formatted_results = self._format_search_results(search_results)
|
||||
if len(search_batches) == 1:
|
||||
formatted_results = self._format_search_results(
|
||||
search_batches[0][1],
|
||||
include_raw_content=use_raw_content_in_result,
|
||||
raw_content_max_chars=raw_content_max_chars,
|
||||
)
|
||||
else:
|
||||
sections = []
|
||||
for idx, (sub_query, sub_result) in enumerate(search_batches, 1):
|
||||
sections.append(
|
||||
self._format_search_results(
|
||||
sub_result,
|
||||
include_raw_content=use_raw_content_in_result,
|
||||
raw_content_max_chars=raw_content_max_chars,
|
||||
section_title=f"【子问题 {idx}】{sub_query}",
|
||||
)
|
||||
)
|
||||
formatted_results = "\n\n".join(sections)
|
||||
|
||||
if failed_queries:
|
||||
failed_text = "\n".join([f"- {q}" for q in failed_queries])
|
||||
formatted_results = (
|
||||
f"{formatted_results}\n\n"
|
||||
f"【未检索成功的子问题】\n{failed_text}"
|
||||
)
|
||||
|
||||
logger.success(f"Tavily 搜索完成: {query[:30]}...")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user