chore: sync current WechatHookBot workspace

This commit is contained in:
2026-03-09 15:48:45 +08:00
parent 4016c1e6eb
commit 9119e2307d
195 changed files with 24438 additions and 17498 deletions

768
plugins/VoiceSynth/main.py Normal file
View File

@@ -0,0 +1,768 @@
"""
VoiceSynth 语音合成插件
支持命令:
- /音色列表
- /切换音色 xx
- /echo 文本
并支持 AI 回复后按概率附带语音回复。
"""
import asyncio
import base64
import random
import re
import uuid
from pathlib import Path
from urllib.parse import urlparse
import aiohttp
import tomllib
from loguru import logger
from utils.plugin_base import PluginBase
from utils.decorators import on_text_message
from WechatHook import WechatHookClient
class VoiceSynth(PluginBase):
"""语音合成插件"""
description = "语音合成与语音回复插件"
author = "ShiHao"
version = "1.0.0"
def __init__(self):
super().__init__()
self.config = {}
self.api_base_url = "https://dashscope.aliyuncs.com/api/v1"
self.api_endpoint = "/services/aigc/multimodal-generation/generation"
self.api_key = ""
self.model = "qwen3-tts-flash"
self.language_type = "Chinese"
self.stream = False
self.timeout = 30
self.api_task = "tts"
self.payload_mode = "auto"
self._alt_endpoint = "/services/aigc/multimodal-generation/generation"
self.voice_map = {}
self.voice_alias_map = {}
self.default_voice = ""
self._chat_voice = {}
self.enable_group = True
self.enable_private = True
self.master_enabled = True
self.ai_voice_probability = 0.0
self.enable_auto_reply_voice = True
self.max_duration_seconds = 60
self.max_chars_per_second = 4
self.allow_raw_audio = False
self.raw_audio_format = "wav"
self._session = None
self._temp_dir = Path(__file__).parent / "temp"
self._temp_dir.mkdir(parents=True, exist_ok=True)
async def async_init(self):
"""插件异步初始化"""
config_path = Path(__file__).parent / "config.toml"
if config_path.exists():
with open(config_path, "rb") as f:
self.config = tomllib.load(f)
api_config = self.config.get("api", {})
self.api_base_url = api_config.get("base_url", self.api_base_url)
self.api_endpoint = api_config.get("endpoint", self.api_endpoint)
self.api_key = api_config.get("api_key", self.api_key)
self.model = api_config.get("model", self.model)
self.language_type = api_config.get("language_type", self.language_type)
self.stream = bool(api_config.get("stream", self.stream))
self.timeout = int(api_config.get("timeout", self.timeout))
self.api_task = str(api_config.get("task", self.api_task)).strip()
self.payload_mode = str(api_config.get("payload_mode", self.payload_mode)).strip().lower()
self._alt_endpoint = str(api_config.get("alt_endpoint", self._alt_endpoint)).strip() or self._alt_endpoint
if self.stream:
logger.warning("stream 暂不支持,已强制关闭")
self.stream = False
voice_config = self.config.get("voices", {})
self.default_voice = str(voice_config.get("default", self.default_voice)).strip()
voice_list = voice_config.get("list", [])
if isinstance(voice_list, str):
voice_list = [voice_list]
self.voice_map, self.voice_alias_map = self._parse_voice_list(voice_list)
if self.default_voice and self.default_voice not in self.voice_map:
logger.warning(f"默认音色不在列表中: {self.default_voice}")
self.voice_map[self.default_voice] = self.default_voice
self.voice_alias_map[self.default_voice] = self.default_voice
if not self.default_voice and self.voice_map:
self.default_voice = next(iter(self.voice_map.keys()))
behavior_config = self.config.get("behavior", {})
self.master_enabled = bool(behavior_config.get("enabled", True))
self.enable_group = bool(behavior_config.get("enable_group", True))
self.enable_private = bool(behavior_config.get("enable_private", True))
reply_config = self.config.get("reply", {})
self.ai_voice_probability = float(reply_config.get("ai_voice_probability", 0.0))
self.enable_auto_reply_voice = bool(reply_config.get("enable_auto_reply_voice", True))
self.max_duration_seconds = int(reply_config.get("max_duration_seconds", 60))
self.max_chars_per_second = int(reply_config.get("max_chars_per_second", 4))
if self.ai_voice_probability > 1:
self.ai_voice_probability = self.ai_voice_probability / 100.0
if self.ai_voice_probability < 0:
self.ai_voice_probability = 0.0
if self.ai_voice_probability > 1:
self.ai_voice_probability = 1.0
conversion_config = self.config.get("conversion", {})
self.allow_raw_audio = bool(conversion_config.get("allow_raw_audio", False))
self.raw_audio_format = str(conversion_config.get("raw_audio_format", "wav")).strip().lower() or "wav"
if self.raw_audio_format not in {"wav", "amr"}:
self.raw_audio_format = "wav"
if self._session is None or self._session.closed:
timeout = aiohttp.ClientTimeout(total=self.timeout)
self._session = aiohttp.ClientSession(timeout=timeout)
logger.info(
"VoiceSynth 配置: endpoint=%s task=%s payload_mode=%s model=%s default_voice=%s voice_count=%d master_enabled=%s allow_raw_audio=%s raw_audio_format=%s",
self._build_api_url(),
self.api_task or "",
self.payload_mode,
self.model,
self.default_voice or "",
len(self.voice_map),
self.master_enabled,
self.allow_raw_audio,
self.raw_audio_format,
)
logger.success("VoiceSynth 插件初始化完成")
async def on_unload(self):
"""插件卸载时调用"""
await super().on_unload()
if self._session and not self._session.closed:
await self._session.close()
self._session = None
def _parse_voice_list(self, voice_list):
voice_map = {}
alias_map = {}
for item in voice_list:
if not item:
continue
if ":" in item:
code, name = item.split(":", 1)
else:
code, name = item, item
code = code.strip()
name = name.strip() or code
if not code:
continue
voice_map[code] = name
if name:
alias_map[name] = code
return voice_map, alias_map
def _resolve_voice(self, voice_key: str) -> str:
voice_key = (voice_key or "").strip()
if not voice_key:
return ""
if voice_key in self.voice_map:
return voice_key
if voice_key in self.voice_alias_map:
return self.voice_alias_map[voice_key]
return ""
def _get_chat_voice(self, chat_id: str) -> str:
return self._chat_voice.get(chat_id, self.default_voice)
def _set_chat_voice(self, chat_id: str, voice_code: str):
if not chat_id or not voice_code:
return
self._chat_voice[chat_id] = voice_code
def _build_api_url(self) -> str:
endpoint = (self.api_endpoint or "").strip()
if endpoint.startswith("http://") or endpoint.startswith("https://"):
return endpoint
return f"{self.api_base_url.rstrip('/')}/{endpoint.lstrip('/')}"
def _save_master_enabled(self, enabled: bool) -> bool:
"""保存 VoiceSynth 总开关到 config.toml"""
try:
behavior = self.config.setdefault("behavior", {})
behavior["enabled"] = bool(enabled)
config_path = Path(__file__).parent / "config.toml"
if not config_path.exists():
return False
text = config_path.read_text(encoding="utf-8")
lines = text.splitlines()
behavior_idx = -1
for i, line in enumerate(lines):
if line.strip().lower() == "[behavior]":
behavior_idx = i
break
enabled_line = f"enabled = {'true' if enabled else 'false'}"
if behavior_idx < 0:
if lines and lines[-1].strip() != "":
lines.append("")
lines.append("[behavior]")
lines.append(enabled_line)
else:
section_end = len(lines)
for i in range(behavior_idx + 1, len(lines)):
if lines[i].strip().startswith("["):
section_end = i
break
replaced = False
for i in range(behavior_idx + 1, section_end):
if re.match(r"^\s*enabled\s*=", lines[i]):
lines[i] = enabled_line
replaced = True
break
if not replaced:
insert_at = behavior_idx + 1
lines.insert(insert_at, enabled_line)
new_text = "\n".join(lines)
if text.endswith("\n"):
new_text += "\n"
config_path.write_text(new_text, encoding="utf-8")
return True
except Exception as e:
logger.warning(f"保存 VoiceSynth 总开关失败: {e}")
return False
def _truncate_text(self, text: str) -> str:
if not text:
return text
max_chars = int(self.max_duration_seconds * self.max_chars_per_second)
if max_chars <= 0:
return text
if len(text) > max_chars:
logger.info(f"语音文本过长,已截断到 {max_chars} 字符")
return text[:max_chars]
return text
def _build_payload(self, text: str, voice: str, mode: str) -> dict:
"""构建 TTS 请求 payload仅用于 HTTP 方式备用)"""
return {
"model": self.model,
"text": text,
"voice": voice,
"language_type": self.language_type,
"stream": False,
}
async def _request_tts(self, text: str, voice: str) -> dict | None:
"""使用 HTTP 直接调用 TTS API"""
if not self.api_key:
logger.warning("VoiceSynth API Key 未配置")
return None
url = self._build_api_url()
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
payload = {
"model": self.model,
"input": {
"text": text,
"voice": voice,
"language_type": self.language_type,
}
}
logger.debug(f"TTS 请求: url={url} voice={voice} text_len={len(text)}")
session = self._session
if session is None or session.closed:
timeout = aiohttp.ClientTimeout(total=self.timeout)
session = aiohttp.ClientSession(timeout=timeout)
self._session = session
try:
async with session.post(url, json=payload, headers=headers) as resp:
data = await resp.json(content_type=None)
logger.debug(f"TTS 响应: status={resp.status} request_id={data.get('request_id', '')}")
if resp.status == 200:
return data
logger.warning(f"TTS 请求失败: {resp.status}, {data}")
return None
except Exception as e:
logger.warning(f"TTS 请求异常: {e}")
return None
def _build_alt_url(self) -> str:
endpoint = (self._alt_endpoint or "").strip()
if not endpoint:
return ""
if endpoint.startswith("http://") or endpoint.startswith("https://"):
return endpoint
return f"{self.api_base_url.rstrip('/')}/{endpoint.lstrip('/')}"
def _get_audio_info(self, response: dict) -> tuple[str, str]:
output = (response or {}).get("output") or {}
audio = output.get("audio") or {}
audio_url = audio.get("url") or ""
audio_data = audio.get("data") or ""
return audio_url, audio_data
def _guess_extension(self, url: str, content_type: str = "") -> str:
suffix = Path(urlparse(url).path).suffix
if suffix:
return suffix
content_type = (content_type or "").lower()
if "wav" in content_type:
return ".wav"
if "mpeg" in content_type or "mp3" in content_type:
return ".mp3"
if "ogg" in content_type:
return ".ogg"
return ".wav"
async def _download_audio(self, url: str) -> Path | None:
session = self._session
if session is None or session.closed:
timeout = aiohttp.ClientTimeout(total=self.timeout)
session = aiohttp.ClientSession(timeout=timeout)
self._session = session
try:
async with session.get(url) as resp:
if resp.status != 200:
logger.warning(f"下载音频失败: {resp.status}")
return None
content_type = resp.headers.get("Content-Type", "")
suffix = self._guess_extension(url, content_type)
file_path = self._temp_dir / f"tts_{uuid.uuid4().hex}{suffix}"
audio_bytes = await resp.read()
file_path.write_bytes(audio_bytes)
logger.debug(f"下载音频完成: size={len(audio_bytes)} path={file_path}")
return file_path
except Exception as e:
logger.warning(f"下载音频异常: {e}")
return None
async def _write_audio_bytes(self, data: bytes, suffix: str = ".wav") -> Path:
file_path = self._temp_dir / f"tts_{uuid.uuid4().hex}{suffix}"
file_path.write_bytes(data)
return file_path
def _load_pysilk(self):
"""加载 silk 编码库,优先 pysilk备选 pilk"""
# 尝试 pysilk
try:
import pysilk
return pysilk, "pysilk", None
except Exception:
pass
# 尝试 pilk64 位兼容)
try:
import pilk
return pilk, "pilk", None
except Exception as e:
return None, None, e
async def _convert_to_silk(self, input_path: Path) -> Path | None:
suffix = input_path.suffix.lower()
if suffix == ".silk":
return input_path
silk_lib, lib_name, err = self._load_pysilk()
if not silk_lib:
import sys
if self.allow_raw_audio:
raw_path = await self._convert_to_raw(input_path)
if raw_path:
logger.warning(
f"缺少 silk 编码库,使用 raw 音频发送: {raw_path} | python={sys.executable}"
)
return raw_path
logger.warning(f"缺少 silk 编码库pysilk/pilk无法转换: {err} | python={sys.executable}")
return None
source_path = input_path
if suffix != ".wav":
converted = await self._convert_to_wav(input_path)
if not converted:
logger.warning(f"不支持的音频格式: {suffix}")
return None
source_path = converted
silk_path = source_path.with_suffix(".silk")
# pilk 使用文件路径 API需要先转换为正确格式的 PCM
if lib_name == "pilk":
try:
import wave
# 读取 WAV 文件信息
with wave.open(str(source_path), "rb") as wf:
sample_rate = wf.getframerate()
channels = wf.getnchannels()
logger.debug(f"WAV 信息: sample_rate={sample_rate} channels={channels}")
# pilk 需要单声道 PCM如果采样率不是标准的需要转换
# 先用 ffmpeg 转换为 16000Hz 单声道
converted_wav = await self._convert_to_wav_16k(source_path)
if not converted_wav:
logger.warning("转换 WAV 到 16kHz 失败")
return None
duration = await asyncio.to_thread(
silk_lib.encode, str(converted_wav), str(silk_path), pcm_rate=16000, tencent=True
)
logger.debug(f"pilk 编码完成: duration={duration}ms")
# 清理临时转换文件
if converted_wav != source_path and converted_wav.exists():
converted_wav.unlink()
return silk_path
except Exception as e:
logger.warning(f"pilk 编码失败: {e}")
return None
# pysilk 使用 PCM 数据 API
try:
import wave
import audioop
with wave.open(str(source_path), "rb") as wf:
sample_rate = wf.getframerate()
channels = wf.getnchannels()
sample_width = wf.getsampwidth()
pcm = wf.readframes(wf.getnframes())
if channels > 1:
pcm = audioop.tomono(pcm, sample_width, 0.5, 0.5)
silk_bytes = await silk_lib.async_encode(
pcm,
data_rate=sample_rate,
sample_rate=sample_rate,
)
silk_path.write_bytes(silk_bytes)
return silk_path
except Exception as e:
logger.warning(f"pysilk 编码失败: {e}")
return None
async def _convert_to_wav(self, input_path: Path) -> Path | None:
import shutil
import subprocess
ffmpeg = shutil.which("ffmpeg")
if not ffmpeg:
return None
output_path = input_path.with_suffix(".wav")
cmd = [
ffmpeg, "-y", "-i", str(input_path),
"-ac", "1", "-ar", "16000",
str(output_path),
]
try:
result = await asyncio.to_thread(
subprocess.run, cmd, capture_output=True, text=True,
)
if result.returncode != 0:
logger.warning(f"ffmpeg 转换失败: {result.stderr}")
return None
return output_path
except Exception as e:
logger.warning(f"ffmpeg 转换异常: {e}")
return None
async def _convert_to_wav_16k(self, input_path: Path) -> Path | None:
"""将音频转换为 16kHz 单声道 WAVpilk 需要)"""
import shutil
import subprocess
ffmpeg = shutil.which("ffmpeg")
if not ffmpeg:
logger.warning("未找到 ffmpeg无法转换音频采样率")
return None
output_path = input_path.parent / f"{input_path.stem}_16k.wav"
cmd = [
ffmpeg, "-y", "-i", str(input_path),
"-ac", "1", "-ar", "16000", "-acodec", "pcm_s16le",
str(output_path),
]
try:
result = await asyncio.to_thread(
subprocess.run, cmd, capture_output=True, text=True,
)
if result.returncode != 0:
logger.warning(f"ffmpeg 转换 16k 失败: {result.stderr}")
return None
logger.debug(f"转换为 16kHz WAV: {output_path}")
return output_path
except Exception as e:
logger.warning(f"ffmpeg 转换 16k 异常: {e}")
return None
async def _convert_to_raw(self, input_path: Path) -> Path | None:
if self.raw_audio_format == "wav":
if input_path.suffix.lower() == ".wav":
return input_path
return await self._convert_to_wav(input_path)
if self.raw_audio_format == "amr":
return await self._convert_with_ffmpeg(input_path, ".amr", sample_rate=8000)
return await self._convert_to_wav(input_path)
async def _convert_with_ffmpeg(self, input_path: Path, suffix: str, sample_rate: int = 16000) -> Path | None:
import shutil
import subprocess
ffmpeg = shutil.which("ffmpeg")
if not ffmpeg:
logger.warning("未找到 ffmpeg无法转码")
return None
output_path = input_path.with_suffix(suffix)
cmd = [
ffmpeg,
"-y",
"-i",
str(input_path),
"-ac",
"1",
"-ar",
str(sample_rate),
str(output_path),
]
try:
result = await asyncio.to_thread(
subprocess.run,
cmd,
capture_output=True,
text=True,
)
if result.returncode != 0:
logger.warning(f"ffmpeg 转换失败: {result.stderr}")
return None
return output_path
except Exception as e:
logger.warning(f"ffmpeg 转换异常: {e}")
return None
async def _synthesize_to_silk(self, text: str, voice: str) -> tuple[Path | None, list[Path]]:
cleanup_paths = []
text = self._truncate_text(text)
if not text:
return None, cleanup_paths
response = await self._request_tts(text, voice)
if not response:
return None, cleanup_paths
audio_url, audio_data = self._get_audio_info(response)
logger.debug(f"音频信息: url={audio_url[:80] if audio_url else ''!r} data_len={len(audio_data) if audio_data else 0}")
if audio_url:
audio_path = await self._download_audio(audio_url)
elif audio_data:
try:
raw = base64.b64decode(audio_data)
audio_path = await self._write_audio_bytes(raw)
except Exception as e:
logger.warning(f"解码音频失败: {e}")
return None, cleanup_paths
else:
logger.warning(f"未获取到音频数据: {response}")
return None, cleanup_paths
if not audio_path:
return None, cleanup_paths
if audio_path.exists():
cleanup_paths.append(audio_path)
if audio_path.suffix.lower() != ".wav":
cleanup_paths.append(audio_path.with_suffix(".wav"))
silk_path = await self._convert_to_silk(audio_path)
if not silk_path:
return None, cleanup_paths
return silk_path, cleanup_paths
async def _send_voice(self, bot: WechatHookClient, to_wxid: str, silk_path: Path) -> bool:
try:
ok = await bot.http_client.send_voice(to_wxid, str(silk_path))
return ok
except Exception as e:
logger.warning(f"发送语音失败: {e}")
return False
async def _speak(self, bot: WechatHookClient, to_wxid: str, text: str, voice: str, silent: bool = False) -> bool:
if not self.master_enabled:
if not silent:
await bot.send_text(to_wxid, "⚠️ VoiceSynth 总开关已关闭")
return False
silk_path = None
cleanup_paths = []
try:
silk_path, cleanup_paths = await self._synthesize_to_silk(text, voice)
if not silk_path:
if not silent:
await bot.send_text(to_wxid, "❌ 语音生成失败")
return False
ok = await self._send_voice(bot, to_wxid, silk_path)
if not ok and not silent:
await bot.send_text(to_wxid, "❌ 语音发送失败")
return ok
finally:
if silk_path:
try:
if silk_path.exists():
silk_path.unlink()
except Exception:
pass
for path in cleanup_paths:
try:
if path.exists():
path.unlink()
except Exception:
pass
async def maybe_send_voice_reply(self, bot: WechatHookClient, to_wxid: str, text: str, message: dict | None = None):
"""AI 回复后按概率发送语音"""
if not self.enabled:
return
if not self.master_enabled:
return
if self.ai_voice_probability <= 0:
return
if message and not self.enable_auto_reply_voice:
if message.get("_auto_reply_triggered") or message.get("_auto_reply_context"):
return
if random.random() > self.ai_voice_probability:
return
is_group = False
if message:
is_group = bool(message.get("IsGroup", False))
else:
is_group = to_wxid.endswith("@chatroom")
if is_group and not self.enable_group:
return
if not is_group and not self.enable_private:
return
voice_code = self._get_chat_voice(to_wxid)
if not voice_code:
return
await self._speak(bot, to_wxid, text, voice_code, silent=True)
@on_text_message(priority=70)
async def handle_voice_command(self, bot: WechatHookClient, message: dict):
"""处理语音合成相关命令"""
content = message.get("Content", "").strip()
from_wxid = message.get("FromWxid", "")
is_group = message.get("IsGroup", False)
if content == "/语音开":
self.master_enabled = True
ok = self._save_master_enabled(True)
if ok:
await bot.send_text(from_wxid, "✅ VoiceSynth 总开关已开启")
else:
await bot.send_text(from_wxid, "⚠️ VoiceSynth 已开启,但写入配置失败")
return False
if content == "/语音关":
self.master_enabled = False
ok = self._save_master_enabled(False)
if ok:
await bot.send_text(from_wxid, "✅ VoiceSynth 总开关已关闭")
else:
await bot.send_text(from_wxid, "⚠️ VoiceSynth 已关闭,但写入配置失败")
return False
if content == "/语音状态":
current_voice = self._get_chat_voice(from_wxid)
current_voice_name = self.voice_map.get(current_voice, current_voice) if current_voice else "未配置"
lines = [
"🎙️ VoiceSynth 状态",
f"总开关: {'开启' if self.master_enabled else '关闭'}",
f"群聊可用: {'' if self.enable_group else ''}",
f"私聊可用: {'' if self.enable_private else ''}",
f"AI回复语音概率: {self.ai_voice_probability:.2f}",
f"AutoReply语音: {'开启' if self.enable_auto_reply_voice else '关闭'}",
f"当前会话音色: {current_voice_name} ({current_voice or '-'})",
]
await bot.send_text(from_wxid, "\n".join(lines))
return False
if not self.master_enabled:
if content == "/音色列表" or content.startswith("/切换音色") or content.startswith("/echo"):
await bot.send_text(from_wxid, "⚠️ VoiceSynth 总开关已关闭")
return False
return True
if is_group and not self.enable_group:
return True
if not is_group and not self.enable_private:
return True
if content == "/音色列表":
if not self.voice_map:
await bot.send_text(from_wxid, "❌ 未配置可用音色")
return False
current = self._get_chat_voice(from_wxid)
lines = ["可用音色:"]
for code, name in self.voice_map.items():
marker = "*" if code == current else "-"
lines.append(f"{marker} {name} ({code})")
lines.append(f"当前音色: {self.voice_map.get(current, current)} ({current})")
lines.append("切换: /切换音色 音色代码")
await bot.send_text(from_wxid, "\n".join(lines))
return False
if content.startswith("/切换音色"):
voice_key = content[len("/切换音色"):].strip()
if not voice_key:
await bot.send_text(from_wxid, "❌ 用法: /切换音色 音色代码")
return False
voice_code = self._resolve_voice(voice_key)
if not voice_code:
await bot.send_text(from_wxid, "❌ 未找到该音色")
return False
self._set_chat_voice(from_wxid, voice_code)
display_name = self.voice_map.get(voice_code, voice_code)
await bot.send_text(from_wxid, f"✅ 已切换音色: {display_name} ({voice_code})")
return False
if content.startswith("/echo"):
text = content[len("/echo"):].strip()
if not text:
await bot.send_text(from_wxid, "❌ 用法: /echo 需要朗读的内容")
return False
voice_code = self._get_chat_voice(from_wxid)
if not voice_code:
await bot.send_text(from_wxid, "❌ 未配置音色")
return False
await self._speak(bot, from_wxid, text, voice_code, silent=False)
return False
return True