chore: sync current WechatHookBot workspace
This commit is contained in:
768
plugins/VoiceSynth/main.py
Normal file
768
plugins/VoiceSynth/main.py
Normal file
@@ -0,0 +1,768 @@
|
||||
"""
|
||||
VoiceSynth 语音合成插件
|
||||
|
||||
支持命令:
|
||||
- /音色列表
|
||||
- /切换音色 xx
|
||||
- /echo 文本
|
||||
|
||||
并支持 AI 回复后按概率附带语音回复。
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import random
|
||||
import re
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import aiohttp
|
||||
import tomllib
|
||||
from loguru import logger
|
||||
|
||||
from utils.plugin_base import PluginBase
|
||||
from utils.decorators import on_text_message
|
||||
from WechatHook import WechatHookClient
|
||||
|
||||
|
||||
class VoiceSynth(PluginBase):
|
||||
"""语音合成插件"""
|
||||
|
||||
description = "语音合成与语音回复插件"
|
||||
author = "ShiHao"
|
||||
version = "1.0.0"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.config = {}
|
||||
|
||||
self.api_base_url = "https://dashscope.aliyuncs.com/api/v1"
|
||||
self.api_endpoint = "/services/aigc/multimodal-generation/generation"
|
||||
self.api_key = ""
|
||||
self.model = "qwen3-tts-flash"
|
||||
self.language_type = "Chinese"
|
||||
self.stream = False
|
||||
self.timeout = 30
|
||||
self.api_task = "tts"
|
||||
self.payload_mode = "auto"
|
||||
self._alt_endpoint = "/services/aigc/multimodal-generation/generation"
|
||||
|
||||
self.voice_map = {}
|
||||
self.voice_alias_map = {}
|
||||
self.default_voice = ""
|
||||
self._chat_voice = {}
|
||||
|
||||
self.enable_group = True
|
||||
self.enable_private = True
|
||||
self.master_enabled = True
|
||||
|
||||
self.ai_voice_probability = 0.0
|
||||
self.enable_auto_reply_voice = True
|
||||
self.max_duration_seconds = 60
|
||||
self.max_chars_per_second = 4
|
||||
self.allow_raw_audio = False
|
||||
self.raw_audio_format = "wav"
|
||||
|
||||
self._session = None
|
||||
self._temp_dir = Path(__file__).parent / "temp"
|
||||
self._temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
async def async_init(self):
|
||||
"""插件异步初始化"""
|
||||
config_path = Path(__file__).parent / "config.toml"
|
||||
if config_path.exists():
|
||||
with open(config_path, "rb") as f:
|
||||
self.config = tomllib.load(f)
|
||||
|
||||
api_config = self.config.get("api", {})
|
||||
self.api_base_url = api_config.get("base_url", self.api_base_url)
|
||||
self.api_endpoint = api_config.get("endpoint", self.api_endpoint)
|
||||
self.api_key = api_config.get("api_key", self.api_key)
|
||||
self.model = api_config.get("model", self.model)
|
||||
self.language_type = api_config.get("language_type", self.language_type)
|
||||
self.stream = bool(api_config.get("stream", self.stream))
|
||||
self.timeout = int(api_config.get("timeout", self.timeout))
|
||||
self.api_task = str(api_config.get("task", self.api_task)).strip()
|
||||
self.payload_mode = str(api_config.get("payload_mode", self.payload_mode)).strip().lower()
|
||||
self._alt_endpoint = str(api_config.get("alt_endpoint", self._alt_endpoint)).strip() or self._alt_endpoint
|
||||
if self.stream:
|
||||
logger.warning("stream 暂不支持,已强制关闭")
|
||||
self.stream = False
|
||||
|
||||
voice_config = self.config.get("voices", {})
|
||||
self.default_voice = str(voice_config.get("default", self.default_voice)).strip()
|
||||
voice_list = voice_config.get("list", [])
|
||||
if isinstance(voice_list, str):
|
||||
voice_list = [voice_list]
|
||||
self.voice_map, self.voice_alias_map = self._parse_voice_list(voice_list)
|
||||
if self.default_voice and self.default_voice not in self.voice_map:
|
||||
logger.warning(f"默认音色不在列表中: {self.default_voice}")
|
||||
self.voice_map[self.default_voice] = self.default_voice
|
||||
self.voice_alias_map[self.default_voice] = self.default_voice
|
||||
if not self.default_voice and self.voice_map:
|
||||
self.default_voice = next(iter(self.voice_map.keys()))
|
||||
|
||||
behavior_config = self.config.get("behavior", {})
|
||||
self.master_enabled = bool(behavior_config.get("enabled", True))
|
||||
self.enable_group = bool(behavior_config.get("enable_group", True))
|
||||
self.enable_private = bool(behavior_config.get("enable_private", True))
|
||||
|
||||
reply_config = self.config.get("reply", {})
|
||||
self.ai_voice_probability = float(reply_config.get("ai_voice_probability", 0.0))
|
||||
self.enable_auto_reply_voice = bool(reply_config.get("enable_auto_reply_voice", True))
|
||||
self.max_duration_seconds = int(reply_config.get("max_duration_seconds", 60))
|
||||
self.max_chars_per_second = int(reply_config.get("max_chars_per_second", 4))
|
||||
if self.ai_voice_probability > 1:
|
||||
self.ai_voice_probability = self.ai_voice_probability / 100.0
|
||||
if self.ai_voice_probability < 0:
|
||||
self.ai_voice_probability = 0.0
|
||||
if self.ai_voice_probability > 1:
|
||||
self.ai_voice_probability = 1.0
|
||||
conversion_config = self.config.get("conversion", {})
|
||||
self.allow_raw_audio = bool(conversion_config.get("allow_raw_audio", False))
|
||||
self.raw_audio_format = str(conversion_config.get("raw_audio_format", "wav")).strip().lower() or "wav"
|
||||
if self.raw_audio_format not in {"wav", "amr"}:
|
||||
self.raw_audio_format = "wav"
|
||||
|
||||
if self._session is None or self._session.closed:
|
||||
timeout = aiohttp.ClientTimeout(total=self.timeout)
|
||||
self._session = aiohttp.ClientSession(timeout=timeout)
|
||||
|
||||
logger.info(
|
||||
"VoiceSynth 配置: endpoint=%s task=%s payload_mode=%s model=%s default_voice=%s voice_count=%d master_enabled=%s allow_raw_audio=%s raw_audio_format=%s",
|
||||
self._build_api_url(),
|
||||
self.api_task or "",
|
||||
self.payload_mode,
|
||||
self.model,
|
||||
self.default_voice or "",
|
||||
len(self.voice_map),
|
||||
self.master_enabled,
|
||||
self.allow_raw_audio,
|
||||
self.raw_audio_format,
|
||||
)
|
||||
logger.success("VoiceSynth 插件初始化完成")
|
||||
|
||||
async def on_unload(self):
|
||||
"""插件卸载时调用"""
|
||||
await super().on_unload()
|
||||
if self._session and not self._session.closed:
|
||||
await self._session.close()
|
||||
self._session = None
|
||||
|
||||
def _parse_voice_list(self, voice_list):
|
||||
voice_map = {}
|
||||
alias_map = {}
|
||||
for item in voice_list:
|
||||
if not item:
|
||||
continue
|
||||
if ":" in item:
|
||||
code, name = item.split(":", 1)
|
||||
else:
|
||||
code, name = item, item
|
||||
code = code.strip()
|
||||
name = name.strip() or code
|
||||
if not code:
|
||||
continue
|
||||
voice_map[code] = name
|
||||
if name:
|
||||
alias_map[name] = code
|
||||
return voice_map, alias_map
|
||||
|
||||
def _resolve_voice(self, voice_key: str) -> str:
|
||||
voice_key = (voice_key or "").strip()
|
||||
if not voice_key:
|
||||
return ""
|
||||
if voice_key in self.voice_map:
|
||||
return voice_key
|
||||
if voice_key in self.voice_alias_map:
|
||||
return self.voice_alias_map[voice_key]
|
||||
return ""
|
||||
|
||||
def _get_chat_voice(self, chat_id: str) -> str:
|
||||
return self._chat_voice.get(chat_id, self.default_voice)
|
||||
|
||||
def _set_chat_voice(self, chat_id: str, voice_code: str):
|
||||
if not chat_id or not voice_code:
|
||||
return
|
||||
self._chat_voice[chat_id] = voice_code
|
||||
|
||||
def _build_api_url(self) -> str:
|
||||
endpoint = (self.api_endpoint or "").strip()
|
||||
if endpoint.startswith("http://") or endpoint.startswith("https://"):
|
||||
return endpoint
|
||||
return f"{self.api_base_url.rstrip('/')}/{endpoint.lstrip('/')}"
|
||||
|
||||
def _save_master_enabled(self, enabled: bool) -> bool:
|
||||
"""保存 VoiceSynth 总开关到 config.toml"""
|
||||
try:
|
||||
behavior = self.config.setdefault("behavior", {})
|
||||
behavior["enabled"] = bool(enabled)
|
||||
|
||||
config_path = Path(__file__).parent / "config.toml"
|
||||
if not config_path.exists():
|
||||
return False
|
||||
|
||||
text = config_path.read_text(encoding="utf-8")
|
||||
lines = text.splitlines()
|
||||
|
||||
behavior_idx = -1
|
||||
for i, line in enumerate(lines):
|
||||
if line.strip().lower() == "[behavior]":
|
||||
behavior_idx = i
|
||||
break
|
||||
|
||||
enabled_line = f"enabled = {'true' if enabled else 'false'}"
|
||||
|
||||
if behavior_idx < 0:
|
||||
if lines and lines[-1].strip() != "":
|
||||
lines.append("")
|
||||
lines.append("[behavior]")
|
||||
lines.append(enabled_line)
|
||||
else:
|
||||
section_end = len(lines)
|
||||
for i in range(behavior_idx + 1, len(lines)):
|
||||
if lines[i].strip().startswith("["):
|
||||
section_end = i
|
||||
break
|
||||
|
||||
replaced = False
|
||||
for i in range(behavior_idx + 1, section_end):
|
||||
if re.match(r"^\s*enabled\s*=", lines[i]):
|
||||
lines[i] = enabled_line
|
||||
replaced = True
|
||||
break
|
||||
|
||||
if not replaced:
|
||||
insert_at = behavior_idx + 1
|
||||
lines.insert(insert_at, enabled_line)
|
||||
|
||||
new_text = "\n".join(lines)
|
||||
if text.endswith("\n"):
|
||||
new_text += "\n"
|
||||
config_path.write_text(new_text, encoding="utf-8")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"保存 VoiceSynth 总开关失败: {e}")
|
||||
return False
|
||||
|
||||
def _truncate_text(self, text: str) -> str:
|
||||
if not text:
|
||||
return text
|
||||
max_chars = int(self.max_duration_seconds * self.max_chars_per_second)
|
||||
if max_chars <= 0:
|
||||
return text
|
||||
if len(text) > max_chars:
|
||||
logger.info(f"语音文本过长,已截断到 {max_chars} 字符")
|
||||
return text[:max_chars]
|
||||
return text
|
||||
|
||||
def _build_payload(self, text: str, voice: str, mode: str) -> dict:
|
||||
"""构建 TTS 请求 payload(仅用于 HTTP 方式备用)"""
|
||||
return {
|
||||
"model": self.model,
|
||||
"text": text,
|
||||
"voice": voice,
|
||||
"language_type": self.language_type,
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
async def _request_tts(self, text: str, voice: str) -> dict | None:
|
||||
"""使用 HTTP 直接调用 TTS API"""
|
||||
if not self.api_key:
|
||||
logger.warning("VoiceSynth API Key 未配置")
|
||||
return None
|
||||
|
||||
url = self._build_api_url()
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"input": {
|
||||
"text": text,
|
||||
"voice": voice,
|
||||
"language_type": self.language_type,
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug(f"TTS 请求: url={url} voice={voice} text_len={len(text)}")
|
||||
|
||||
session = self._session
|
||||
if session is None or session.closed:
|
||||
timeout = aiohttp.ClientTimeout(total=self.timeout)
|
||||
session = aiohttp.ClientSession(timeout=timeout)
|
||||
self._session = session
|
||||
|
||||
try:
|
||||
async with session.post(url, json=payload, headers=headers) as resp:
|
||||
data = await resp.json(content_type=None)
|
||||
logger.debug(f"TTS 响应: status={resp.status} request_id={data.get('request_id', '')}")
|
||||
if resp.status == 200:
|
||||
return data
|
||||
logger.warning(f"TTS 请求失败: {resp.status}, {data}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"TTS 请求异常: {e}")
|
||||
return None
|
||||
|
||||
def _build_alt_url(self) -> str:
|
||||
endpoint = (self._alt_endpoint or "").strip()
|
||||
if not endpoint:
|
||||
return ""
|
||||
if endpoint.startswith("http://") or endpoint.startswith("https://"):
|
||||
return endpoint
|
||||
return f"{self.api_base_url.rstrip('/')}/{endpoint.lstrip('/')}"
|
||||
|
||||
def _get_audio_info(self, response: dict) -> tuple[str, str]:
|
||||
output = (response or {}).get("output") or {}
|
||||
audio = output.get("audio") or {}
|
||||
audio_url = audio.get("url") or ""
|
||||
audio_data = audio.get("data") or ""
|
||||
return audio_url, audio_data
|
||||
|
||||
def _guess_extension(self, url: str, content_type: str = "") -> str:
|
||||
suffix = Path(urlparse(url).path).suffix
|
||||
if suffix:
|
||||
return suffix
|
||||
content_type = (content_type or "").lower()
|
||||
if "wav" in content_type:
|
||||
return ".wav"
|
||||
if "mpeg" in content_type or "mp3" in content_type:
|
||||
return ".mp3"
|
||||
if "ogg" in content_type:
|
||||
return ".ogg"
|
||||
return ".wav"
|
||||
|
||||
async def _download_audio(self, url: str) -> Path | None:
|
||||
session = self._session
|
||||
if session is None or session.closed:
|
||||
timeout = aiohttp.ClientTimeout(total=self.timeout)
|
||||
session = aiohttp.ClientSession(timeout=timeout)
|
||||
self._session = session
|
||||
|
||||
try:
|
||||
async with session.get(url) as resp:
|
||||
if resp.status != 200:
|
||||
logger.warning(f"下载音频失败: {resp.status}")
|
||||
return None
|
||||
content_type = resp.headers.get("Content-Type", "")
|
||||
suffix = self._guess_extension(url, content_type)
|
||||
file_path = self._temp_dir / f"tts_{uuid.uuid4().hex}{suffix}"
|
||||
audio_bytes = await resp.read()
|
||||
file_path.write_bytes(audio_bytes)
|
||||
logger.debug(f"下载音频完成: size={len(audio_bytes)} path={file_path}")
|
||||
return file_path
|
||||
except Exception as e:
|
||||
logger.warning(f"下载音频异常: {e}")
|
||||
return None
|
||||
|
||||
async def _write_audio_bytes(self, data: bytes, suffix: str = ".wav") -> Path:
|
||||
file_path = self._temp_dir / f"tts_{uuid.uuid4().hex}{suffix}"
|
||||
file_path.write_bytes(data)
|
||||
return file_path
|
||||
|
||||
def _load_pysilk(self):
|
||||
"""加载 silk 编码库,优先 pysilk,备选 pilk"""
|
||||
# 尝试 pysilk
|
||||
try:
|
||||
import pysilk
|
||||
return pysilk, "pysilk", None
|
||||
except Exception:
|
||||
pass
|
||||
# 尝试 pilk(64 位兼容)
|
||||
try:
|
||||
import pilk
|
||||
return pilk, "pilk", None
|
||||
except Exception as e:
|
||||
return None, None, e
|
||||
|
||||
async def _convert_to_silk(self, input_path: Path) -> Path | None:
|
||||
suffix = input_path.suffix.lower()
|
||||
if suffix == ".silk":
|
||||
return input_path
|
||||
|
||||
silk_lib, lib_name, err = self._load_pysilk()
|
||||
if not silk_lib:
|
||||
import sys
|
||||
if self.allow_raw_audio:
|
||||
raw_path = await self._convert_to_raw(input_path)
|
||||
if raw_path:
|
||||
logger.warning(
|
||||
f"缺少 silk 编码库,使用 raw 音频发送: {raw_path} | python={sys.executable}"
|
||||
)
|
||||
return raw_path
|
||||
logger.warning(f"缺少 silk 编码库(pysilk/pilk),无法转换: {err} | python={sys.executable}")
|
||||
return None
|
||||
|
||||
source_path = input_path
|
||||
if suffix != ".wav":
|
||||
converted = await self._convert_to_wav(input_path)
|
||||
if not converted:
|
||||
logger.warning(f"不支持的音频格式: {suffix}")
|
||||
return None
|
||||
source_path = converted
|
||||
|
||||
silk_path = source_path.with_suffix(".silk")
|
||||
|
||||
# pilk 使用文件路径 API,需要先转换为正确格式的 PCM
|
||||
if lib_name == "pilk":
|
||||
try:
|
||||
import wave
|
||||
# 读取 WAV 文件信息
|
||||
with wave.open(str(source_path), "rb") as wf:
|
||||
sample_rate = wf.getframerate()
|
||||
channels = wf.getnchannels()
|
||||
logger.debug(f"WAV 信息: sample_rate={sample_rate} channels={channels}")
|
||||
|
||||
# pilk 需要单声道 PCM,如果采样率不是标准的需要转换
|
||||
# 先用 ffmpeg 转换为 16000Hz 单声道
|
||||
converted_wav = await self._convert_to_wav_16k(source_path)
|
||||
if not converted_wav:
|
||||
logger.warning("转换 WAV 到 16kHz 失败")
|
||||
return None
|
||||
|
||||
duration = await asyncio.to_thread(
|
||||
silk_lib.encode, str(converted_wav), str(silk_path), pcm_rate=16000, tencent=True
|
||||
)
|
||||
logger.debug(f"pilk 编码完成: duration={duration}ms")
|
||||
# 清理临时转换文件
|
||||
if converted_wav != source_path and converted_wav.exists():
|
||||
converted_wav.unlink()
|
||||
return silk_path
|
||||
except Exception as e:
|
||||
logger.warning(f"pilk 编码失败: {e}")
|
||||
return None
|
||||
|
||||
# pysilk 使用 PCM 数据 API
|
||||
try:
|
||||
import wave
|
||||
import audioop
|
||||
|
||||
with wave.open(str(source_path), "rb") as wf:
|
||||
sample_rate = wf.getframerate()
|
||||
channels = wf.getnchannels()
|
||||
sample_width = wf.getsampwidth()
|
||||
pcm = wf.readframes(wf.getnframes())
|
||||
|
||||
if channels > 1:
|
||||
pcm = audioop.tomono(pcm, sample_width, 0.5, 0.5)
|
||||
|
||||
silk_bytes = await silk_lib.async_encode(
|
||||
pcm,
|
||||
data_rate=sample_rate,
|
||||
sample_rate=sample_rate,
|
||||
)
|
||||
silk_path.write_bytes(silk_bytes)
|
||||
return silk_path
|
||||
except Exception as e:
|
||||
logger.warning(f"pysilk 编码失败: {e}")
|
||||
return None
|
||||
|
||||
async def _convert_to_wav(self, input_path: Path) -> Path | None:
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
ffmpeg = shutil.which("ffmpeg")
|
||||
if not ffmpeg:
|
||||
return None
|
||||
|
||||
output_path = input_path.with_suffix(".wav")
|
||||
cmd = [
|
||||
ffmpeg, "-y", "-i", str(input_path),
|
||||
"-ac", "1", "-ar", "16000",
|
||||
str(output_path),
|
||||
]
|
||||
try:
|
||||
result = await asyncio.to_thread(
|
||||
subprocess.run, cmd, capture_output=True, text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.warning(f"ffmpeg 转换失败: {result.stderr}")
|
||||
return None
|
||||
return output_path
|
||||
except Exception as e:
|
||||
logger.warning(f"ffmpeg 转换异常: {e}")
|
||||
return None
|
||||
|
||||
async def _convert_to_wav_16k(self, input_path: Path) -> Path | None:
|
||||
"""将音频转换为 16kHz 单声道 WAV(pilk 需要)"""
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
ffmpeg = shutil.which("ffmpeg")
|
||||
if not ffmpeg:
|
||||
logger.warning("未找到 ffmpeg,无法转换音频采样率")
|
||||
return None
|
||||
|
||||
output_path = input_path.parent / f"{input_path.stem}_16k.wav"
|
||||
cmd = [
|
||||
ffmpeg, "-y", "-i", str(input_path),
|
||||
"-ac", "1", "-ar", "16000", "-acodec", "pcm_s16le",
|
||||
str(output_path),
|
||||
]
|
||||
try:
|
||||
result = await asyncio.to_thread(
|
||||
subprocess.run, cmd, capture_output=True, text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.warning(f"ffmpeg 转换 16k 失败: {result.stderr}")
|
||||
return None
|
||||
logger.debug(f"转换为 16kHz WAV: {output_path}")
|
||||
return output_path
|
||||
except Exception as e:
|
||||
logger.warning(f"ffmpeg 转换 16k 异常: {e}")
|
||||
return None
|
||||
|
||||
async def _convert_to_raw(self, input_path: Path) -> Path | None:
|
||||
if self.raw_audio_format == "wav":
|
||||
if input_path.suffix.lower() == ".wav":
|
||||
return input_path
|
||||
return await self._convert_to_wav(input_path)
|
||||
if self.raw_audio_format == "amr":
|
||||
return await self._convert_with_ffmpeg(input_path, ".amr", sample_rate=8000)
|
||||
return await self._convert_to_wav(input_path)
|
||||
|
||||
async def _convert_with_ffmpeg(self, input_path: Path, suffix: str, sample_rate: int = 16000) -> Path | None:
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
ffmpeg = shutil.which("ffmpeg")
|
||||
if not ffmpeg:
|
||||
logger.warning("未找到 ffmpeg,无法转码")
|
||||
return None
|
||||
|
||||
output_path = input_path.with_suffix(suffix)
|
||||
cmd = [
|
||||
ffmpeg,
|
||||
"-y",
|
||||
"-i",
|
||||
str(input_path),
|
||||
"-ac",
|
||||
"1",
|
||||
"-ar",
|
||||
str(sample_rate),
|
||||
str(output_path),
|
||||
]
|
||||
try:
|
||||
result = await asyncio.to_thread(
|
||||
subprocess.run,
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.warning(f"ffmpeg 转换失败: {result.stderr}")
|
||||
return None
|
||||
return output_path
|
||||
except Exception as e:
|
||||
logger.warning(f"ffmpeg 转换异常: {e}")
|
||||
return None
|
||||
|
||||
async def _synthesize_to_silk(self, text: str, voice: str) -> tuple[Path | None, list[Path]]:
|
||||
cleanup_paths = []
|
||||
text = self._truncate_text(text)
|
||||
if not text:
|
||||
return None, cleanup_paths
|
||||
|
||||
response = await self._request_tts(text, voice)
|
||||
if not response:
|
||||
return None, cleanup_paths
|
||||
|
||||
audio_url, audio_data = self._get_audio_info(response)
|
||||
logger.debug(f"音频信息: url={audio_url[:80] if audio_url else ''!r} data_len={len(audio_data) if audio_data else 0}")
|
||||
if audio_url:
|
||||
audio_path = await self._download_audio(audio_url)
|
||||
elif audio_data:
|
||||
try:
|
||||
raw = base64.b64decode(audio_data)
|
||||
audio_path = await self._write_audio_bytes(raw)
|
||||
except Exception as e:
|
||||
logger.warning(f"解码音频失败: {e}")
|
||||
return None, cleanup_paths
|
||||
else:
|
||||
logger.warning(f"未获取到音频数据: {response}")
|
||||
return None, cleanup_paths
|
||||
|
||||
if not audio_path:
|
||||
return None, cleanup_paths
|
||||
|
||||
if audio_path.exists():
|
||||
cleanup_paths.append(audio_path)
|
||||
if audio_path.suffix.lower() != ".wav":
|
||||
cleanup_paths.append(audio_path.with_suffix(".wav"))
|
||||
|
||||
silk_path = await self._convert_to_silk(audio_path)
|
||||
if not silk_path:
|
||||
return None, cleanup_paths
|
||||
return silk_path, cleanup_paths
|
||||
|
||||
async def _send_voice(self, bot: WechatHookClient, to_wxid: str, silk_path: Path) -> bool:
|
||||
try:
|
||||
ok = await bot.http_client.send_voice(to_wxid, str(silk_path))
|
||||
return ok
|
||||
except Exception as e:
|
||||
logger.warning(f"发送语音失败: {e}")
|
||||
return False
|
||||
|
||||
async def _speak(self, bot: WechatHookClient, to_wxid: str, text: str, voice: str, silent: bool = False) -> bool:
|
||||
if not self.master_enabled:
|
||||
if not silent:
|
||||
await bot.send_text(to_wxid, "⚠️ VoiceSynth 总开关已关闭")
|
||||
return False
|
||||
|
||||
silk_path = None
|
||||
cleanup_paths = []
|
||||
try:
|
||||
silk_path, cleanup_paths = await self._synthesize_to_silk(text, voice)
|
||||
if not silk_path:
|
||||
if not silent:
|
||||
await bot.send_text(to_wxid, "❌ 语音生成失败")
|
||||
return False
|
||||
ok = await self._send_voice(bot, to_wxid, silk_path)
|
||||
if not ok and not silent:
|
||||
await bot.send_text(to_wxid, "❌ 语音发送失败")
|
||||
return ok
|
||||
finally:
|
||||
if silk_path:
|
||||
try:
|
||||
if silk_path.exists():
|
||||
silk_path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
for path in cleanup_paths:
|
||||
try:
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def maybe_send_voice_reply(self, bot: WechatHookClient, to_wxid: str, text: str, message: dict | None = None):
|
||||
"""AI 回复后按概率发送语音"""
|
||||
if not self.enabled:
|
||||
return
|
||||
if not self.master_enabled:
|
||||
return
|
||||
if self.ai_voice_probability <= 0:
|
||||
return
|
||||
if message and not self.enable_auto_reply_voice:
|
||||
if message.get("_auto_reply_triggered") or message.get("_auto_reply_context"):
|
||||
return
|
||||
if random.random() > self.ai_voice_probability:
|
||||
return
|
||||
|
||||
is_group = False
|
||||
if message:
|
||||
is_group = bool(message.get("IsGroup", False))
|
||||
else:
|
||||
is_group = to_wxid.endswith("@chatroom")
|
||||
|
||||
if is_group and not self.enable_group:
|
||||
return
|
||||
if not is_group and not self.enable_private:
|
||||
return
|
||||
|
||||
voice_code = self._get_chat_voice(to_wxid)
|
||||
if not voice_code:
|
||||
return
|
||||
|
||||
await self._speak(bot, to_wxid, text, voice_code, silent=True)
|
||||
|
||||
@on_text_message(priority=70)
|
||||
async def handle_voice_command(self, bot: WechatHookClient, message: dict):
|
||||
"""处理语音合成相关命令"""
|
||||
content = message.get("Content", "").strip()
|
||||
from_wxid = message.get("FromWxid", "")
|
||||
is_group = message.get("IsGroup", False)
|
||||
|
||||
if content == "/语音开":
|
||||
self.master_enabled = True
|
||||
ok = self._save_master_enabled(True)
|
||||
if ok:
|
||||
await bot.send_text(from_wxid, "✅ VoiceSynth 总开关已开启")
|
||||
else:
|
||||
await bot.send_text(from_wxid, "⚠️ VoiceSynth 已开启,但写入配置失败")
|
||||
return False
|
||||
|
||||
if content == "/语音关":
|
||||
self.master_enabled = False
|
||||
ok = self._save_master_enabled(False)
|
||||
if ok:
|
||||
await bot.send_text(from_wxid, "✅ VoiceSynth 总开关已关闭")
|
||||
else:
|
||||
await bot.send_text(from_wxid, "⚠️ VoiceSynth 已关闭,但写入配置失败")
|
||||
return False
|
||||
|
||||
if content == "/语音状态":
|
||||
current_voice = self._get_chat_voice(from_wxid)
|
||||
current_voice_name = self.voice_map.get(current_voice, current_voice) if current_voice else "未配置"
|
||||
lines = [
|
||||
"🎙️ VoiceSynth 状态",
|
||||
f"总开关: {'开启' if self.master_enabled else '关闭'}",
|
||||
f"群聊可用: {'是' if self.enable_group else '否'}",
|
||||
f"私聊可用: {'是' if self.enable_private else '否'}",
|
||||
f"AI回复语音概率: {self.ai_voice_probability:.2f}",
|
||||
f"AutoReply语音: {'开启' if self.enable_auto_reply_voice else '关闭'}",
|
||||
f"当前会话音色: {current_voice_name} ({current_voice or '-'})",
|
||||
]
|
||||
await bot.send_text(from_wxid, "\n".join(lines))
|
||||
return False
|
||||
|
||||
if not self.master_enabled:
|
||||
if content == "/音色列表" or content.startswith("/切换音色") or content.startswith("/echo"):
|
||||
await bot.send_text(from_wxid, "⚠️ VoiceSynth 总开关已关闭")
|
||||
return False
|
||||
return True
|
||||
|
||||
if is_group and not self.enable_group:
|
||||
return True
|
||||
if not is_group and not self.enable_private:
|
||||
return True
|
||||
|
||||
if content == "/音色列表":
|
||||
if not self.voice_map:
|
||||
await bot.send_text(from_wxid, "❌ 未配置可用音色")
|
||||
return False
|
||||
|
||||
current = self._get_chat_voice(from_wxid)
|
||||
lines = ["可用音色:"]
|
||||
for code, name in self.voice_map.items():
|
||||
marker = "*" if code == current else "-"
|
||||
lines.append(f"{marker} {name} ({code})")
|
||||
lines.append(f"当前音色: {self.voice_map.get(current, current)} ({current})")
|
||||
lines.append("切换: /切换音色 音色代码")
|
||||
await bot.send_text(from_wxid, "\n".join(lines))
|
||||
return False
|
||||
|
||||
if content.startswith("/切换音色"):
|
||||
voice_key = content[len("/切换音色"):].strip()
|
||||
if not voice_key:
|
||||
await bot.send_text(from_wxid, "❌ 用法: /切换音色 音色代码")
|
||||
return False
|
||||
|
||||
voice_code = self._resolve_voice(voice_key)
|
||||
if not voice_code:
|
||||
await bot.send_text(from_wxid, "❌ 未找到该音色")
|
||||
return False
|
||||
|
||||
self._set_chat_voice(from_wxid, voice_code)
|
||||
display_name = self.voice_map.get(voice_code, voice_code)
|
||||
await bot.send_text(from_wxid, f"✅ 已切换音色: {display_name} ({voice_code})")
|
||||
return False
|
||||
|
||||
if content.startswith("/echo"):
|
||||
text = content[len("/echo"):].strip()
|
||||
if not text:
|
||||
await bot.send_text(from_wxid, "❌ 用法: /echo 需要朗读的内容")
|
||||
return False
|
||||
|
||||
voice_code = self._get_chat_voice(from_wxid)
|
||||
if not voice_code:
|
||||
await bot.send_text(from_wxid, "❌ 未配置音色")
|
||||
return False
|
||||
|
||||
await self._speak(bot, from_wxid, text, voice_code, silent=False)
|
||||
return False
|
||||
|
||||
return True
|
||||
Reference in New Issue
Block a user