""" 图片/视频处理模块 提供媒体文件的下载、编码和描述生成: - 图片下载与 base64 编码 - 表情包下载与编码 - 视频下载与编码 - AI 图片/视频描述生成 使用示例: from utils.image_processor import ImageProcessor, MediaConfig config = MediaConfig( api_url="https://api.openai.com/v1/chat/completions", api_key="sk-xxx", model="gpt-4-vision-preview", ) processor = ImageProcessor(config) # 下载图片 image_base64 = await processor.download_image(bot, cdnurl, aeskey) # 生成描述 description = await processor.generate_description(image_base64, "描述这张图片") """ from __future__ import annotations import asyncio import base64 import json import uuid from dataclasses import dataclass, field from pathlib import Path from typing import Any, Dict, Optional, TYPE_CHECKING import aiohttp from loguru import logger # 可选代理支持 try: from aiohttp_socks import ProxyConnector PROXY_SUPPORT = True except ImportError: PROXY_SUPPORT = False if TYPE_CHECKING: pass # bot 类型提示 @dataclass class MediaConfig: """媒体处理配置""" # API 配置 api_url: str = "https://api.openai.com/v1/chat/completions" api_key: str = "" model: str = "gpt-4-vision-preview" timeout: int = 120 max_tokens: int = 1000 retries: int = 2 # 代理配置 proxy_enabled: bool = False proxy_type: str = "socks5" proxy_host: str = "127.0.0.1" proxy_port: int = 7890 proxy_username: str = "" proxy_password: str = "" # 视频专用配置 video_api_url: str = "" video_model: str = "" video_max_size_mb: int = 20 video_timeout: int = 360 video_max_tokens: int = 8192 # 临时目录 temp_dir: Optional[Path] = None @classmethod def from_dict(cls, config: Dict[str, Any]) -> "MediaConfig": """从配置字典创建""" api_config = config.get("api", {}) proxy_config = config.get("proxy", {}) image_desc_config = config.get("image_description", {}) video_config = config.get("video_recognition", {}) return cls( api_url=api_config.get("url", "https://api.openai.com/v1/chat/completions"), api_key=api_config.get("api_key", ""), model=image_desc_config.get("model", api_config.get("model", "gpt-4-vision-preview")), timeout=api_config.get("timeout", 120), max_tokens=image_desc_config.get("max_tokens", 1000), retries=image_desc_config.get("retries", 2), proxy_enabled=proxy_config.get("enabled", False), proxy_type=proxy_config.get("type", "socks5"), proxy_host=proxy_config.get("host", "127.0.0.1"), proxy_port=proxy_config.get("port", 7890), proxy_username=proxy_config.get("username", ""), proxy_password=proxy_config.get("password", ""), video_api_url=video_config.get("api_url", ""), video_model=video_config.get("model", ""), video_max_size_mb=video_config.get("max_size_mb", 20), video_timeout=video_config.get("timeout", 360), video_max_tokens=video_config.get("max_tokens", 8192), ) @dataclass class MediaResult: """媒体处理结果""" success: bool = False data: str = "" # base64 数据 description: str = "" error: Optional[str] = None media_type: str = "image" # image, emoji, video class ImageProcessor: """ 图片/视频处理器 提供统一的媒体处理接口: - 下载和编码 - AI 描述生成 - 缓存支持 """ def __init__(self, config: MediaConfig, temp_dir: Optional[Path] = None): self.config = config self.temp_dir = temp_dir or config.temp_dir or Path("temp") self.temp_dir.mkdir(exist_ok=True) def _get_proxy_connector(self) -> Optional[Any]: """获取代理连接器""" if not self.config.proxy_enabled or not PROXY_SUPPORT: return None proxy_type = self.config.proxy_type.upper() if self.config.proxy_username and self.config.proxy_password: proxy_url = ( f"{proxy_type}://{self.config.proxy_username}:" f"{self.config.proxy_password}@" f"{self.config.proxy_host}:{self.config.proxy_port}" ) else: proxy_url = f"{proxy_type}://{self.config.proxy_host}:{self.config.proxy_port}" try: return ProxyConnector.from_url(proxy_url) except Exception as e: logger.warning(f"[ImageProcessor] 代理配置失败: {e}") return None async def download_image( self, bot, cdnurl: str, aeskey: str, use_cache: bool = True, ) -> str: """ 下载图片并转换为 base64 Args: bot: WechatHookClient 实例(用于 CDN 下载) cdnurl: CDN URL aeskey: AES 密钥 use_cache: 是否使用缓存 Returns: base64 编码的图片数据(带 data URI 前缀) """ try: # 1. 优先从 Redis 缓存获取 if use_cache: from utils.redis_cache import RedisCache, get_cache redis_cache = get_cache() if redis_cache and redis_cache.enabled: media_key = RedisCache.generate_media_key(cdnurl, aeskey) if media_key: cached_data = redis_cache.get_cached_media(media_key, "image") if cached_data: logger.debug(f"[ImageProcessor] 图片缓存命中: {media_key[:20]}...") return cached_data # 2. 缓存未命中,下载图片 logger.debug(f"[ImageProcessor] 开始下载图片...") filename = f"temp_{uuid.uuid4().hex[:8]}.jpg" save_path = str((self.temp_dir / filename).resolve()) # 尝试下载中图,失败则下载原图 success = await bot.cdn_download(cdnurl, aeskey, save_path, file_type=2) if not success: success = await bot.cdn_download(cdnurl, aeskey, save_path, file_type=1) if not success: logger.error("[ImageProcessor] CDN 下载失败") return "" # 等待文件写入完成 import os for _ in range(20): # 最多等待10秒 if os.path.exists(save_path) and os.path.getsize(save_path) > 0: break await asyncio.sleep(0.5) if not os.path.exists(save_path): logger.error("[ImageProcessor] 图片文件未生成") return "" with open(save_path, "rb") as f: image_data = base64.b64encode(f.read()).decode() base64_result = f"data:image/jpeg;base64,{image_data}" # 3. 缓存到 Redis if use_cache: try: from utils.redis_cache import RedisCache, get_cache redis_cache = get_cache() if redis_cache and redis_cache.enabled: media_key = RedisCache.generate_media_key(cdnurl, aeskey) if media_key: redis_cache.cache_media(media_key, base64_result, "image", ttl=300) logger.debug(f"[ImageProcessor] 图片已缓存: {media_key[:20]}...") except Exception as e: logger.debug(f"[ImageProcessor] 缓存图片失败: {e}") # 清理临时文件 try: Path(save_path).unlink() except Exception: pass return base64_result except Exception as e: logger.error(f"[ImageProcessor] 下载图片失败: {e}") return "" async def download_emoji( self, cdn_url: str, max_retries: int = 3, use_cache: bool = True, ) -> str: """ 下载表情包并转换为 base64 Args: cdn_url: CDN URL max_retries: 最大重试次数 use_cache: 是否使用缓存 Returns: base64 编码的表情包数据(带 data URI 前缀) """ # 替换 HTML 实体 cdn_url = cdn_url.replace("&", "&") # 1. 优先从 Redis 缓存获取 media_key = None if use_cache: try: from utils.redis_cache import RedisCache, get_cache redis_cache = get_cache() media_key = RedisCache.generate_media_key(cdnurl=cdn_url) if redis_cache and redis_cache.enabled and media_key: cached_data = redis_cache.get_cached_media(media_key, "emoji") if cached_data: logger.debug(f"[ImageProcessor] 表情包缓存命中: {media_key[:20]}...") return cached_data except Exception: pass # 2. 缓存未命中,下载表情包 logger.debug(f"[ImageProcessor] 开始下载表情包...") last_error = None connector = self._get_proxy_connector() for attempt in range(max_retries): try: timeout = aiohttp.ClientTimeout(total=30 + attempt * 15) async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session: async with session.get(cdn_url) as response: if response.status == 200: content = await response.read() if len(content) == 0: logger.warning(f"[ImageProcessor] 表情包内容为空,重试 {attempt + 1}/{max_retries}") continue image_data = base64.b64encode(content).decode() base64_result = f"data:image/gif;base64,{image_data}" logger.debug(f"[ImageProcessor] 表情包下载成功,大小: {len(content)} 字节") # 3. 缓存到 Redis if use_cache and media_key: try: from utils.redis_cache import get_cache redis_cache = get_cache() if redis_cache and redis_cache.enabled: redis_cache.cache_media(media_key, base64_result, "emoji", ttl=300) logger.debug(f"[ImageProcessor] 表情包已缓存: {media_key[:20]}...") except Exception: pass return base64_result else: logger.warning(f"[ImageProcessor] 表情包下载失败,状态码: {response.status}") except asyncio.TimeoutError: last_error = "请求超时" logger.warning(f"[ImageProcessor] 表情包下载超时,重试 {attempt + 1}/{max_retries}") except aiohttp.ClientError as e: last_error = str(e) logger.warning(f"[ImageProcessor] 表情包下载网络错误: {e}") except Exception as e: last_error = str(e) logger.warning(f"[ImageProcessor] 表情包下载异常: {e}") if attempt < max_retries - 1: await asyncio.sleep(1 * (attempt + 1)) logger.error(f"[ImageProcessor] 表情包下载失败,已重试 {max_retries} 次: {last_error}") return "" async def download_video( self, bot, cdnurl: str, aeskey: str, use_cache: bool = True, ) -> str: """ 下载视频并转换为 base64 Args: bot: WechatHookClient 实例 cdnurl: CDN URL aeskey: AES 密钥 use_cache: 是否使用缓存 Returns: base64 编码的视频数据(带 data URI 前缀) """ try: # 从缓存获取 media_key = None if use_cache: try: from utils.redis_cache import RedisCache, get_cache redis_cache = get_cache() if redis_cache and redis_cache.enabled: media_key = RedisCache.generate_media_key(cdnurl, aeskey) if media_key: cached_data = redis_cache.get_cached_media(media_key, "video") if cached_data: logger.debug(f"[ImageProcessor] 视频缓存命中: {media_key[:20]}...") return cached_data except Exception: pass # 下载视频 logger.info(f"[ImageProcessor] 开始下载视频...") filename = f"video_{uuid.uuid4().hex[:8]}.mp4" save_path = str((self.temp_dir / filename).resolve()) # file_type=4 表示视频 success = await bot.cdn_download(cdnurl, aeskey, save_path, file_type=4) if not success: logger.error("[ImageProcessor] 视频 CDN 下载失败") return "" # 等待文件写入完成 import os for _ in range(30): if os.path.exists(save_path) and os.path.getsize(save_path) > 0: break await asyncio.sleep(0.5) if not os.path.exists(save_path): logger.error("[ImageProcessor] 视频文件未生成") return "" file_size = os.path.getsize(save_path) logger.info(f"[ImageProcessor] 视频下载完成,大小: {file_size / 1024 / 1024:.2f} MB") # 检查文件大小限制 max_size_mb = self.config.video_max_size_mb if file_size > max_size_mb * 1024 * 1024: logger.warning(f"[ImageProcessor] 视频文件过大: {file_size / 1024 / 1024:.2f} MB > {max_size_mb} MB") try: Path(save_path).unlink() except Exception: pass return "" # 读取并编码 with open(save_path, "rb") as f: video_data = base64.b64encode(f.read()).decode() video_base64 = f"data:video/mp4;base64,{video_data}" # 缓存到 Redis if use_cache and media_key: try: from utils.redis_cache import get_cache redis_cache = get_cache() if redis_cache and redis_cache.enabled: redis_cache.cache_media(media_key, video_base64, "video", ttl=600) logger.debug(f"[ImageProcessor] 视频已缓存: {media_key[:20]}...") except Exception: pass # 清理临时文件 try: Path(save_path).unlink() except Exception: pass return video_base64 except Exception as e: logger.error(f"[ImageProcessor] 下载视频失败: {e}") import traceback logger.error(traceback.format_exc()) return "" async def generate_description( self, image_base64: str, prompt: str = "请用一句话简洁地描述这张图片的主要内容。", model: Optional[str] = None, ) -> str: """ 使用 AI 生成图片描述 Args: image_base64: 图片的 base64 数据 prompt: 描述提示词 model: 使用的模型(默认使用配置中的模型) Returns: 图片描述文本,失败返回空字符串 """ description_model = model or self.config.model messages = [ { "role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": image_base64}} ] } ] payload = { "model": description_model, "messages": messages, "max_tokens": self.config.max_tokens, "stream": True } headers = { "Content-Type": "application/json", "Authorization": f"Bearer {self.config.api_key}" } max_retries = self.config.retries last_error = None for attempt in range(max_retries + 1): try: timeout = aiohttp.ClientTimeout(total=self.config.timeout) connector = self._get_proxy_connector() async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session: async with session.post( self.config.api_url, json=payload, headers=headers ) as resp: if resp.status != 200: error_text = await resp.text() raise Exception(f"API 返回错误: {resp.status}, {error_text[:200]}") # 流式接收响应 description = "" async for line in resp.content: line = line.decode('utf-8').strip() if not line or line == "data: [DONE]": continue if line.startswith("data: "): try: data = json.loads(line[6:]) delta = data.get("choices", [{}])[0].get("delta", {}) content = delta.get("content", "") if content: description += content except Exception: pass logger.debug(f"[ImageProcessor] 图片描述生成成功: {description[:50]}...") return description.strip() except asyncio.CancelledError: raise except (aiohttp.ClientError, asyncio.TimeoutError) as e: last_error = str(e) if attempt < max_retries: logger.warning(f"[ImageProcessor] 图片描述网络错误: {e},重试 {attempt + 1}/{max_retries}") await asyncio.sleep(1 * (attempt + 1)) continue except Exception as e: last_error = str(e) if attempt < max_retries: logger.warning(f"[ImageProcessor] 图片描述生成异常: {e},重试 {attempt + 1}/{max_retries}") await asyncio.sleep(1 * (attempt + 1)) continue logger.error(f"[ImageProcessor] 生成图片描述失败,已重试 {max_retries + 1} 次: {last_error}") return "" async def analyze_video( self, video_base64: str, prompt: Optional[str] = None, ) -> str: """ 使用 AI 分析视频内容 Args: video_base64: 视频的 base64 数据 prompt: 分析提示词 Returns: 视频分析描述,失败返回空字符串 """ if not self.config.video_api_url or not self.config.video_model: logger.error("[ImageProcessor] 视频分析配置不完整") return "" # 去除 data:video/mp4;base64, 前缀(如果有) if video_base64.startswith("data:"): video_base64 = video_base64.split(",", 1)[1] default_prompt = """请详细分析这个视频的内容,包括: 1. 视频的主要场景和环境 2. 出现的人物/物体及其动作 3. 视频中的文字、对话或声音(如果有) 4. 视频的整体主题或要表达的内容 5. 任何值得注意的细节 请用客观、详细的方式描述,不要加入主观评价。""" analyze_prompt = prompt or default_prompt full_url = f"{self.config.video_api_url}/{self.config.video_model}:generateContent" payload = { "contents": [ { "parts": [ {"text": analyze_prompt}, { "inline_data": { "mime_type": "video/mp4", "data": video_base64 } } ] } ], "generationConfig": { "maxOutputTokens": self.config.video_max_tokens } } headers = { "Content-Type": "application/json", "Authorization": f"Bearer {self.config.api_key}" } timeout = aiohttp.ClientTimeout(total=self.config.video_timeout) max_retries = 2 retry_delay = 5 for attempt in range(max_retries + 1): try: logger.info(f"[ImageProcessor] 开始分析视频...{f' (重试 {attempt}/{max_retries})' if attempt > 0 else ''}") async with aiohttp.ClientSession(timeout=timeout) as session: async with session.post(full_url, json=payload, headers=headers) as resp: if resp.status in [502, 503, 504]: logger.warning(f"[ImageProcessor] 视频 API 临时错误: {resp.status}") if attempt < max_retries: await asyncio.sleep(retry_delay) continue return "" if resp.status != 200: error_text = await resp.text() logger.error(f"[ImageProcessor] 视频 API 错误: {resp.status}, {error_text[:300]}") return "" result = await resp.json() # 检查安全过滤 if "promptFeedback" in result: feedback = result["promptFeedback"] if feedback.get("blockReason"): logger.warning(f"[ImageProcessor] 视频内容被过滤: {feedback.get('blockReason')}") return "" # 提取文本 if "candidates" in result and result["candidates"]: for candidate in result["candidates"]: if candidate.get("finishReason") == "SAFETY": logger.warning("[ImageProcessor] 视频响应被安全过滤") return "" content = candidate.get("content", {}) for part in content.get("parts", []): if "text" in part: text = part["text"] logger.info(f"[ImageProcessor] 视频分析完成,长度: {len(text)}") return text logger.error(f"[ImageProcessor] 视频分析无有效响应") return "" except asyncio.TimeoutError: logger.warning(f"[ImageProcessor] 视频分析超时{f', 将重试...' if attempt < max_retries else ''}") if attempt < max_retries: await asyncio.sleep(retry_delay) continue return "" except Exception as e: logger.error(f"[ImageProcessor] 视频分析失败: {e}") import traceback logger.error(traceback.format_exc()) return "" return "" # ==================== 便捷函数 ==================== _default_processor: Optional[ImageProcessor] = None def get_image_processor(config: Optional[MediaConfig] = None) -> ImageProcessor: """获取默认图片处理器""" global _default_processor if config: _default_processor = ImageProcessor(config) if _default_processor is None: raise ValueError("ImageProcessor 未初始化,请先传入配置") return _default_processor def init_image_processor(config_dict: Dict[str, Any], temp_dir: Optional[Path] = None) -> ImageProcessor: """从配置字典初始化图片处理器""" config = MediaConfig.from_dict(config_dict) if temp_dir: config.temp_dir = temp_dir processor = ImageProcessor(config, temp_dir) global _default_processor _default_processor = processor return processor # ==================== 导出 ==================== __all__ = [ 'MediaConfig', 'MediaResult', 'ImageProcessor', 'get_image_processor', 'init_image_processor', ]