""" Sora2API 视频生成插件 支持命令触发和LLM工具调用,支持横屏/竖屏选择,支持图生视频 """ import asyncio import tomllib import httpx import uuid import xml.etree.ElementTree as ET import base64 from pathlib import Path from datetime import datetime from typing import List, Optional from loguru import logger from utils.plugin_base import PluginBase from utils.decorators import on_text_message from WechatHook import WechatHookClient def on_quote_message(priority=50): """引用消息装饰器""" def decorator(func): setattr(func, '_event_type', 'quote_message') setattr(func, '_priority', min(max(priority, 0), 99)) return func return decorator class Sora2API(PluginBase): """Sora2API 视频生成插件""" description = "Sora2API 视频生成插件 - 支持横屏/竖屏视频生成和LLM工具调用" author = "ShiHao" version = "1.0.0" def __init__(self): super().__init__() self.config = None self.videos_dir = None async def async_init(self): """异步初始化""" config_path = Path(__file__).parent / "config.toml" with open(config_path, "rb") as f: self.config = tomllib.load(f) # 创建视频目录 self.videos_dir = Path(__file__).parent / "videos" self.videos_dir.mkdir(exist_ok=True) logger.success("Sora2API 视频插件初始化完成") def _get_model(self, orientation: str) -> str: """根据方向获取模型名称""" if orientation == "landscape": return "sora-video-landscape-15s" else: return "sora-video-portrait-15s" async def generate_video(self, prompt: str, orientation: str = "portrait", image_base64: str = None) -> List[str]: """ 生成视频 Args: prompt: 提示词 orientation: 方向 (portrait/landscape) image_base64: 可选的图片base64(图生视频) Returns: 视频本地路径列表 """ api_config = self.config["api"] gen_config = self.config["generation"] max_retry = gen_config["max_retry_attempts"] model = self._get_model(orientation) for attempt in range(max_retry): if attempt > 0: await asyncio.sleep(min(2 ** attempt, 10)) try: url = f"{api_config['base_url'].rstrip('/')}/v1/chat/completions" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_config['token']}" } # 构造消息内容 if image_base64: content = [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": image_base64}} ] else: content = prompt payload = { "model": model, "messages": [{"role": "user", "content": content}], "stream": True } logger.info(f"Sora2API请求: {model}, 提示词长度: {len(prompt)} 字符") logger.debug(f"完整提示词: {prompt}") logger.debug(f"请求URL: {url}") logger.debug(f"Payload大小: {len(str(payload))} 字节") # 设置合理的超时时间 # 对于流式响应,read 超时是指两次数据块之间的最大间隔,而不是总时间 # 因此可以设置较长的 read 超时,以应对视频生成的长时间等待 max_timeout = min(api_config["timeout"], 600) # 增加到 10 分钟 timeout = httpx.Timeout( connect=10.0, # 连接超时:10秒 read=max_timeout, # 读取超时:10分钟(两次数据块之间的最大间隔) write=10.0, # 写入超时:10秒 pool=10.0 # 连接池超时:10秒 ) logger.debug(f"超时配置: connect=10s, read={max_timeout}s") # 获取 AIChat 的代理配置 # proxy = await self._get_aichat_proxy() # 临时禁用代理进行测试 proxy = None # 添加提示词长度检查和警告 if len(prompt) > 1000: logger.warning(f"提示词较长 ({len(prompt)} 字符),可能影响处理速度") async with httpx.AsyncClient(timeout=timeout, proxy=proxy) as client: async with client.stream("POST", url, json=payload, headers=headers) as response: logger.debug(f"收到响应状态码: {response.status_code}") if response.status_code == 200: # 处理流式响应 video_url = None full_content = "" async for line in response.aiter_lines(): logger.debug(f"收到响应行: {line}") if line.startswith("data: "): data_str = line[6:] if data_str == "[DONE]": break try: import json data = json.loads(data_str) if "choices" in data and data["choices"]: delta = data["choices"][0].get("delta", {}) content = delta.get("content", "") if content: full_content += content logger.debug(f"累积内容: {full_content}") if "http" in content: # 提取视频URL import re urls = re.findall(r'https?://[^\s\)\]"\']+', content) if urls: video_url = urls[0].rstrip("'\"") logger.info(f"提取到视频URL: {video_url}") except Exception as e: logger.warning(f"解析响应数据失败: {e}, 数据: {data_str}") continue # 如果没有从流中提取到URL,尝试从完整内容中提取 if not video_url and full_content: import re urls = re.findall(r'https?://[^\s\)\]"\']+', full_content) if urls: video_url = urls[0].rstrip("'\"") logger.info(f"从完整内容提取到视频URL: {video_url}") else: logger.warning(f"完整响应内容中未找到URL: {full_content}") if not video_url: logger.error(f"未能提取到视频URL,完整响应: {full_content}") if video_url: # 下载视频 video_path = await self._download_video(video_url) if video_path: logger.success("成功生成视频") return [video_path] else: # 下载失败(可能是404),继续重试 logger.warning(f"视频下载失败,将重试 ({attempt + 1}/{max_retry})") continue elif response.status_code == 401: logger.error("Token认证失败") return [] else: error_text = await response.aread() logger.error(f"API请求失败: {response.status_code}, {error_text[:200]}") continue except asyncio.TimeoutError: logger.warning(f"请求超时(asyncio.TimeoutError),重试中... ({attempt + 1}/{max_retry})") continue except httpx.ReadTimeout: logger.warning(f"读取超时(ReadTimeout),可能是视频生成时间过长,重试中... ({attempt + 1}/{max_retry})") logger.info(f"提示词长度: {len(prompt)} 字符,建议缩短提示词或增加超时时间") continue except Exception as e: import traceback logger.error(f"请求异常: {type(e).__name__}: {str(e)}") logger.error(f"异常详情:\n{traceback.format_exc()}") logger.error(f"提示词长度: {len(prompt)} 字符") continue logger.error("视频生成失败") return [] async def _get_aichat_proxy(self) -> Optional[str]: """获取 AIChat 插件的 SOCKS5 代理配置""" try: aichat_config_path = Path(__file__).parent.parent / "AIChat" / "config.toml" if aichat_config_path.exists(): with open(aichat_config_path, "rb") as f: aichat_config = tomllib.load(f) proxy_config = aichat_config.get("proxy", {}) if proxy_config.get("enabled", False): proxy_type = proxy_config.get("type", "socks5") proxy_host = proxy_config.get("host", "127.0.0.1") proxy_port = proxy_config.get("port", 7890) proxy = f"{proxy_type}://{proxy_host}:{proxy_port}" logger.info(f"使用 AIChat 代理: {proxy}") return proxy except Exception as e: logger.warning(f"读取 AIChat 代理配置失败: {e}") return None async def _download_video(self, url: str, retry_on_404: bool = True) -> Optional[str]: """ 下载视频到本地 Args: url: 视频URL retry_on_404: 是否在404时返回None以触发重试 """ try: timeout = httpx.Timeout(connect=10.0, read=240.0, write=10.0, pool=10.0) # 获取 AIChat 的代理配置 # proxy = await self._get_aichat_proxy() # 临时禁用代理进行测试 proxy = None async with httpx.AsyncClient(timeout=timeout, proxy=proxy) as client: response = await client.get(url) # 检查是否是404错误 if response.status_code == 404: logger.warning(f"视频URL返回404: {url}") if retry_on_404: logger.info("将触发重试以获取新的视频链接") return None response.raise_for_status() # 生成文件名 ts = datetime.now().strftime("%Y%m%d_%H%M%S") uid = uuid.uuid4().hex[:8] file_path = self.videos_dir / f"sora_{ts}_{uid}.mp4" # 保存文件 with open(file_path, "wb") as f: f.write(response.content) logger.info(f"视频下载成功: {file_path}") return str(file_path) except httpx.HTTPStatusError as e: if e.response.status_code == 404: logger.warning(f"视频URL返回404: {url}") if retry_on_404: logger.info("将触发重试以获取新的视频链接") return None logger.error(f"下载视频失败: {e}") except Exception as e: logger.error(f"下载视频失败: {e}") return None @on_text_message(priority=70) async def handle_message(self, bot: WechatHookClient, message: dict): """处理文本消息""" if not self.config["behavior"]["enable_command"]: return True content = message.get("Content", "").strip() from_wxid = message.get("FromWxid", "") is_group = message.get("IsGroup", False) # 检查群聊/私聊开关 if is_group and not self.config["behavior"]["enable_group"]: return True if not is_group and not self.config["behavior"]["enable_private"]: return True # 检查是否是视频生成命令 keywords = self.config["behavior"]["command_keywords"] matched_keyword = None for keyword in keywords: if content.startswith(keyword + " ") or content.startswith(keyword + "横屏 ") or content.startswith(keyword + "竖屏 "): matched_keyword = keyword break if not matched_keyword: return True # 提取方向和提示词 rest = content[len(matched_keyword):].strip() orientation = self.config["generation"]["default_orientation"] if rest.startswith("横屏 "): orientation = "landscape" prompt = rest[3:].strip() elif rest.startswith("竖屏 "): orientation = "portrait" prompt = rest[3:].strip() else: prompt = rest if not prompt: await bot.send_text(from_wxid, "❌ 请提供视频生成提示词\n用法: /sora <提示词> 或 /sora横屏 <提示词>") return False logger.info(f"收到视频生成请求: {prompt[:50]}..., 方向: {orientation}") try: # 发送开始提示 await bot.send_text(from_wxid, "🎬 视频生成中,请稍候...") # 生成视频 video_paths = await self.generate_video(prompt, orientation) if video_paths: # 发送视频 await bot.send_file(from_wxid, video_paths[0]) logger.success("视频生成成功,已发送") else: await bot.send_text(from_wxid, "❌ 视频生成失败,请稍后重试") except Exception as e: logger.error(f"视频生成处理失败: {e}") await bot.send_text(from_wxid, f"❌ 处理失败: {str(e)}") return False @on_quote_message(priority=70) async def handle_quote_message(self, bot: WechatHookClient, message: dict): """处理引用图片的视频生成命令""" if not self.config["behavior"]["enable_command"]: return True content = message.get("Content", "").strip() from_wxid = message.get("FromWxid", "") is_group = message.get("IsGroup", False) # 检查群聊/私聊开关 if is_group and not self.config["behavior"]["enable_group"]: return True if not is_group and not self.config["behavior"]["enable_private"]: return True # 解析 XML 获取标题和引用消息 try: root = ET.fromstring(content) title = root.find(".//title") if title is None or not title.text: return True title_text = title.text.strip() # 检查是否是视频生成命令 keywords = self.config["behavior"]["command_keywords"] matched_keyword = None for keyword in keywords: if title_text.startswith(keyword + " ") or title_text.startswith(keyword + "横屏 ") or title_text.startswith(keyword + "竖屏 "): matched_keyword = keyword break if not matched_keyword: return True # 提取方向和提示词 rest = title_text[len(matched_keyword):].strip() orientation = self.config["generation"]["default_orientation"] if rest.startswith("横屏 "): orientation = "landscape" prompt = rest[3:].strip() elif rest.startswith("竖屏 "): orientation = "portrait" prompt = rest[3:].strip() else: prompt = rest if not prompt: await bot.send_text(from_wxid, "❌ 请提供视频生成提示词") return False # 获取引用消息中的图片信息 refermsg = root.find(".//refermsg") if refermsg is None: return True # 不是引用消息,让普通命令处理 # 解析引用消息的内容 refer_content = refermsg.find("content") if refer_content is None or not refer_content.text: await bot.send_text(from_wxid, "❌ 引用的消息中没有图片") return False # 解码 HTML 实体 import html refer_xml = html.unescape(refer_content.text) refer_root = ET.fromstring(refer_xml) # 提取图片信息 img = refer_root.find(".//img") if img is None: await bot.send_text(from_wxid, "❌ 引用的消息中没有图片") return False # 获取图片的 CDN URL 和 AES Key cdnbigimgurl = img.get("cdnbigimgurl", "") aeskey = img.get("aeskey", "") if not cdnbigimgurl or not aeskey: await bot.send_text(from_wxid, "❌ 无法获取图片信息") return False logger.info(f"收到图生视频请求: {prompt[:50]}..., 方向: {orientation}") except Exception as e: logger.error(f"解析引用消息失败: {e}") return True try: # 发送开始提示 await bot.send_text(from_wxid, "🎬 图生视频中,请稍候...") # 下载图片并转换为 base64 image_base64 = await self._download_and_encode_image(bot, cdnbigimgurl, aeskey) if not image_base64: await bot.send_text(from_wxid, "❌ 无法下载图片") return False # 生成视频 video_paths = await self.generate_video(prompt, orientation, image_base64) if video_paths: # 发送视频 await bot.send_file(from_wxid, video_paths[0]) logger.success("图生视频成功,已发送") else: await bot.send_text(from_wxid, "❌ 视频生成失败,请稍后重试") except Exception as e: logger.error(f"图生视频处理失败: {e}") await bot.send_text(from_wxid, f"❌ 处理失败: {str(e)}") return False async def _download_and_encode_image(self, bot, cdnurl: str, aeskey: str) -> str: """下载图片并转换为 base64""" try: # 创建临时目录 temp_dir = Path(__file__).parent / "temp" temp_dir.mkdir(exist_ok=True) # 生成临时文件名 filename = f"temp_{datetime.now():%Y%m%d_%H%M%S}_{uuid.uuid4().hex[:8]}.jpg" save_path = str((temp_dir / filename).resolve()) # 使用 CDN 下载 API 下载图片 logger.info(f"正在下载图片: {cdnurl[:50]}...") success = await bot.cdn_download(cdnurl, aeskey, save_path, file_type=2) if not success: logger.warning("中图下载失败,尝试下载原图...") success = await bot.cdn_download(cdnurl, aeskey, save_path, file_type=1) if not success: logger.error("图片下载失败") return "" # 等待文件写入完成 import os max_wait = 10 wait_time = 0 while wait_time < max_wait: if os.path.exists(save_path) and os.path.getsize(save_path) > 0: logger.info(f"文件已就绪: {save_path}") break await asyncio.sleep(0.5) wait_time += 0.5 if not os.path.exists(save_path): logger.error(f"文件下载超时或失败: {save_path}") return "" # 读取图片并转换为 base64 with open(save_path, "rb") as f: image_data = base64.b64encode(f.read()).decode() # 删除临时文件 try: Path(save_path).unlink() except: pass return f"data:image/jpeg;base64,{image_data}" except Exception as e: logger.error(f"下载图片失败: {e}") return "" def get_llm_tools(self) -> List[dict]: """返回LLM工具定义""" if not self.config["llm_tool"]["enabled"]: return [] return [{ "type": "function", "function": { "name": self.config["llm_tool"]["tool_name"], "description": self.config["llm_tool"]["tool_description"], "parameters": { "type": "object", "properties": { "prompt": { "type": "string", "description": "视频生成提示词,描述想要生成的视频内容" }, "orientation": { "type": "string", "enum": ["portrait", "landscape"], "description": "视频方向。portrait=竖屏(适合人物、竖版内容),landscape=横屏(适合风景、横向场景)。" } }, "required": ["prompt", "orientation"] } } }] async def execute_llm_tool(self, tool_name: str, arguments: dict, bot: WechatHookClient, from_wxid: str) -> dict: """执行LLM工具调用""" expected_tool_name = self.config["llm_tool"]["tool_name"] if tool_name != expected_tool_name: return None try: prompt = arguments.get("prompt") orientation = arguments.get("orientation", "portrait") image_base64 = arguments.get("image_base64") # 支持图生视频 if not prompt: return {"success": False, "message": "缺少提示词参数"} logger.info(f"LLM工具调用 - 目标: {from_wxid}, 提示词: {prompt[:50]}..., 方向: {orientation}") # 生成视频 video_paths = await self.generate_video(prompt, orientation, image_base64) if video_paths: # 发送视频 logger.info(f"准备发送视频到: {from_wxid}") await bot.send_file(from_wxid, video_paths[0]) return { "success": True, "message": f"已生成并发送{'竖屏' if orientation == 'portrait' else '横屏'}视频", "videos": [video_paths[0]] } else: return {"success": False, "message": "视频生成失败"} except Exception as e: logger.error(f"LLM工具执行失败: {e}") return {"success": False, "message": f"执行失败: {str(e)}"}