This commit is contained in:
2025-12-31 17:47:39 +08:00
38 changed files with 4435 additions and 1343 deletions

690
utils/image_processor.py Normal file
View File

@@ -0,0 +1,690 @@
"""
图片/视频处理模块
提供媒体文件的下载、编码和描述生成:
- 图片下载与 base64 编码
- 表情包下载与编码
- 视频下载与编码
- AI 图片/视频描述生成
使用示例:
from utils.image_processor import ImageProcessor, MediaConfig
config = MediaConfig(
api_url="https://api.openai.com/v1/chat/completions",
api_key="sk-xxx",
model="gpt-4-vision-preview",
)
processor = ImageProcessor(config)
# 下载图片
image_base64 = await processor.download_image(bot, cdnurl, aeskey)
# 生成描述
description = await processor.generate_description(image_base64, "描述这张图片")
"""
from __future__ import annotations
import asyncio
import base64
import json
import uuid
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, Optional, TYPE_CHECKING
import aiohttp
from loguru import logger
# 可选代理支持
try:
from aiohttp_socks import ProxyConnector
PROXY_SUPPORT = True
except ImportError:
PROXY_SUPPORT = False
if TYPE_CHECKING:
pass # bot 类型提示
@dataclass
class MediaConfig:
"""媒体处理配置"""
# API 配置
api_url: str = "https://api.openai.com/v1/chat/completions"
api_key: str = ""
model: str = "gpt-4-vision-preview"
timeout: int = 120
max_tokens: int = 1000
retries: int = 2
# 代理配置
proxy_enabled: bool = False
proxy_type: str = "socks5"
proxy_host: str = "127.0.0.1"
proxy_port: int = 7890
proxy_username: str = ""
proxy_password: str = ""
# 视频专用配置
video_api_url: str = ""
video_model: str = ""
video_max_size_mb: int = 20
video_timeout: int = 360
video_max_tokens: int = 8192
# 临时目录
temp_dir: Optional[Path] = None
@classmethod
def from_dict(cls, config: Dict[str, Any]) -> "MediaConfig":
"""从配置字典创建"""
api_config = config.get("api", {})
proxy_config = config.get("proxy", {})
image_desc_config = config.get("image_description", {})
video_config = config.get("video_recognition", {})
return cls(
api_url=api_config.get("url", "https://api.openai.com/v1/chat/completions"),
api_key=api_config.get("api_key", ""),
model=image_desc_config.get("model", api_config.get("model", "gpt-4-vision-preview")),
timeout=api_config.get("timeout", 120),
max_tokens=image_desc_config.get("max_tokens", 1000),
retries=image_desc_config.get("retries", 2),
proxy_enabled=proxy_config.get("enabled", False),
proxy_type=proxy_config.get("type", "socks5"),
proxy_host=proxy_config.get("host", "127.0.0.1"),
proxy_port=proxy_config.get("port", 7890),
proxy_username=proxy_config.get("username", ""),
proxy_password=proxy_config.get("password", ""),
video_api_url=video_config.get("api_url", ""),
video_model=video_config.get("model", ""),
video_max_size_mb=video_config.get("max_size_mb", 20),
video_timeout=video_config.get("timeout", 360),
video_max_tokens=video_config.get("max_tokens", 8192),
)
@dataclass
class MediaResult:
"""媒体处理结果"""
success: bool = False
data: str = "" # base64 数据
description: str = ""
error: Optional[str] = None
media_type: str = "image" # image, emoji, video
class ImageProcessor:
"""
图片/视频处理器
提供统一的媒体处理接口:
- 下载和编码
- AI 描述生成
- 缓存支持
"""
def __init__(self, config: MediaConfig, temp_dir: Optional[Path] = None):
self.config = config
self.temp_dir = temp_dir or config.temp_dir or Path("temp")
self.temp_dir.mkdir(exist_ok=True)
def _get_proxy_connector(self) -> Optional[Any]:
"""获取代理连接器"""
if not self.config.proxy_enabled or not PROXY_SUPPORT:
return None
proxy_type = self.config.proxy_type.upper()
if self.config.proxy_username and self.config.proxy_password:
proxy_url = (
f"{proxy_type}://{self.config.proxy_username}:"
f"{self.config.proxy_password}@"
f"{self.config.proxy_host}:{self.config.proxy_port}"
)
else:
proxy_url = f"{proxy_type}://{self.config.proxy_host}:{self.config.proxy_port}"
try:
return ProxyConnector.from_url(proxy_url)
except Exception as e:
logger.warning(f"[ImageProcessor] 代理配置失败: {e}")
return None
async def download_image(
self,
bot,
cdnurl: str,
aeskey: str,
use_cache: bool = True,
) -> str:
"""
下载图片并转换为 base64
Args:
bot: WechatHookClient 实例(用于 CDN 下载)
cdnurl: CDN URL
aeskey: AES 密钥
use_cache: 是否使用缓存
Returns:
base64 编码的图片数据(带 data URI 前缀)
"""
try:
# 1. 优先从 Redis 缓存获取
if use_cache:
from utils.redis_cache import RedisCache, get_cache
redis_cache = get_cache()
if redis_cache and redis_cache.enabled:
media_key = RedisCache.generate_media_key(cdnurl, aeskey)
if media_key:
cached_data = redis_cache.get_cached_media(media_key, "image")
if cached_data:
logger.debug(f"[ImageProcessor] 图片缓存命中: {media_key[:20]}...")
return cached_data
# 2. 缓存未命中,下载图片
logger.debug(f"[ImageProcessor] 开始下载图片...")
filename = f"temp_{uuid.uuid4().hex[:8]}.jpg"
save_path = str((self.temp_dir / filename).resolve())
# 尝试下载中图,失败则下载原图
success = await bot.cdn_download(cdnurl, aeskey, save_path, file_type=2)
if not success:
success = await bot.cdn_download(cdnurl, aeskey, save_path, file_type=1)
if not success:
logger.error("[ImageProcessor] CDN 下载失败")
return ""
# 等待文件写入完成
import os
for _ in range(20): # 最多等待10秒
if os.path.exists(save_path) and os.path.getsize(save_path) > 0:
break
await asyncio.sleep(0.5)
if not os.path.exists(save_path):
logger.error("[ImageProcessor] 图片文件未生成")
return ""
with open(save_path, "rb") as f:
image_data = base64.b64encode(f.read()).decode()
base64_result = f"data:image/jpeg;base64,{image_data}"
# 3. 缓存到 Redis
if use_cache:
try:
from utils.redis_cache import RedisCache, get_cache
redis_cache = get_cache()
if redis_cache and redis_cache.enabled:
media_key = RedisCache.generate_media_key(cdnurl, aeskey)
if media_key:
redis_cache.cache_media(media_key, base64_result, "image", ttl=300)
logger.debug(f"[ImageProcessor] 图片已缓存: {media_key[:20]}...")
except Exception as e:
logger.debug(f"[ImageProcessor] 缓存图片失败: {e}")
# 清理临时文件
try:
Path(save_path).unlink()
except Exception:
pass
return base64_result
except Exception as e:
logger.error(f"[ImageProcessor] 下载图片失败: {e}")
return ""
async def download_emoji(
self,
cdn_url: str,
max_retries: int = 3,
use_cache: bool = True,
) -> str:
"""
下载表情包并转换为 base64
Args:
cdn_url: CDN URL
max_retries: 最大重试次数
use_cache: 是否使用缓存
Returns:
base64 编码的表情包数据(带 data URI 前缀)
"""
# 替换 HTML 实体
cdn_url = cdn_url.replace("&", "&")
# 1. 优先从 Redis 缓存获取
media_key = None
if use_cache:
try:
from utils.redis_cache import RedisCache, get_cache
redis_cache = get_cache()
media_key = RedisCache.generate_media_key(cdnurl=cdn_url)
if redis_cache and redis_cache.enabled and media_key:
cached_data = redis_cache.get_cached_media(media_key, "emoji")
if cached_data:
logger.debug(f"[ImageProcessor] 表情包缓存命中: {media_key[:20]}...")
return cached_data
except Exception:
pass
# 2. 缓存未命中,下载表情包
logger.debug(f"[ImageProcessor] 开始下载表情包...")
last_error = None
connector = self._get_proxy_connector()
for attempt in range(max_retries):
try:
timeout = aiohttp.ClientTimeout(total=30 + attempt * 15)
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
async with session.get(cdn_url) as response:
if response.status == 200:
content = await response.read()
if len(content) == 0:
logger.warning(f"[ImageProcessor] 表情包内容为空,重试 {attempt + 1}/{max_retries}")
continue
image_data = base64.b64encode(content).decode()
base64_result = f"data:image/gif;base64,{image_data}"
logger.debug(f"[ImageProcessor] 表情包下载成功,大小: {len(content)} 字节")
# 3. 缓存到 Redis
if use_cache and media_key:
try:
from utils.redis_cache import get_cache
redis_cache = get_cache()
if redis_cache and redis_cache.enabled:
redis_cache.cache_media(media_key, base64_result, "emoji", ttl=300)
logger.debug(f"[ImageProcessor] 表情包已缓存: {media_key[:20]}...")
except Exception:
pass
return base64_result
else:
logger.warning(f"[ImageProcessor] 表情包下载失败,状态码: {response.status}")
except asyncio.TimeoutError:
last_error = "请求超时"
logger.warning(f"[ImageProcessor] 表情包下载超时,重试 {attempt + 1}/{max_retries}")
except aiohttp.ClientError as e:
last_error = str(e)
logger.warning(f"[ImageProcessor] 表情包下载网络错误: {e}")
except Exception as e:
last_error = str(e)
logger.warning(f"[ImageProcessor] 表情包下载异常: {e}")
if attempt < max_retries - 1:
await asyncio.sleep(1 * (attempt + 1))
logger.error(f"[ImageProcessor] 表情包下载失败,已重试 {max_retries} 次: {last_error}")
return ""
async def download_video(
self,
bot,
cdnurl: str,
aeskey: str,
use_cache: bool = True,
) -> str:
"""
下载视频并转换为 base64
Args:
bot: WechatHookClient 实例
cdnurl: CDN URL
aeskey: AES 密钥
use_cache: 是否使用缓存
Returns:
base64 编码的视频数据(带 data URI 前缀)
"""
try:
# 从缓存获取
media_key = None
if use_cache:
try:
from utils.redis_cache import RedisCache, get_cache
redis_cache = get_cache()
if redis_cache and redis_cache.enabled:
media_key = RedisCache.generate_media_key(cdnurl, aeskey)
if media_key:
cached_data = redis_cache.get_cached_media(media_key, "video")
if cached_data:
logger.debug(f"[ImageProcessor] 视频缓存命中: {media_key[:20]}...")
return cached_data
except Exception:
pass
# 下载视频
logger.info(f"[ImageProcessor] 开始下载视频...")
filename = f"video_{uuid.uuid4().hex[:8]}.mp4"
save_path = str((self.temp_dir / filename).resolve())
# file_type=4 表示视频
success = await bot.cdn_download(cdnurl, aeskey, save_path, file_type=4)
if not success:
logger.error("[ImageProcessor] 视频 CDN 下载失败")
return ""
# 等待文件写入完成
import os
for _ in range(30):
if os.path.exists(save_path) and os.path.getsize(save_path) > 0:
break
await asyncio.sleep(0.5)
if not os.path.exists(save_path):
logger.error("[ImageProcessor] 视频文件未生成")
return ""
file_size = os.path.getsize(save_path)
logger.info(f"[ImageProcessor] 视频下载完成,大小: {file_size / 1024 / 1024:.2f} MB")
# 检查文件大小限制
max_size_mb = self.config.video_max_size_mb
if file_size > max_size_mb * 1024 * 1024:
logger.warning(f"[ImageProcessor] 视频文件过大: {file_size / 1024 / 1024:.2f} MB > {max_size_mb} MB")
try:
Path(save_path).unlink()
except Exception:
pass
return ""
# 读取并编码
with open(save_path, "rb") as f:
video_data = base64.b64encode(f.read()).decode()
video_base64 = f"data:video/mp4;base64,{video_data}"
# 缓存到 Redis
if use_cache and media_key:
try:
from utils.redis_cache import get_cache
redis_cache = get_cache()
if redis_cache and redis_cache.enabled:
redis_cache.cache_media(media_key, video_base64, "video", ttl=600)
logger.debug(f"[ImageProcessor] 视频已缓存: {media_key[:20]}...")
except Exception:
pass
# 清理临时文件
try:
Path(save_path).unlink()
except Exception:
pass
return video_base64
except Exception as e:
logger.error(f"[ImageProcessor] 下载视频失败: {e}")
import traceback
logger.error(traceback.format_exc())
return ""
async def generate_description(
self,
image_base64: str,
prompt: str = "请用一句话简洁地描述这张图片的主要内容。",
model: Optional[str] = None,
) -> str:
"""
使用 AI 生成图片描述
Args:
image_base64: 图片的 base64 数据
prompt: 描述提示词
model: 使用的模型(默认使用配置中的模型)
Returns:
图片描述文本,失败返回空字符串
"""
description_model = model or self.config.model
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_base64}}
]
}
]
payload = {
"model": description_model,
"messages": messages,
"max_tokens": self.config.max_tokens,
"stream": True
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.config.api_key}"
}
max_retries = self.config.retries
last_error = None
for attempt in range(max_retries + 1):
try:
timeout = aiohttp.ClientTimeout(total=self.config.timeout)
connector = self._get_proxy_connector()
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
async with session.post(
self.config.api_url,
json=payload,
headers=headers
) as resp:
if resp.status != 200:
error_text = await resp.text()
raise Exception(f"API 返回错误: {resp.status}, {error_text[:200]}")
# 流式接收响应
description = ""
async for line in resp.content:
line = line.decode('utf-8').strip()
if not line or line == "data: [DONE]":
continue
if line.startswith("data: "):
try:
data = json.loads(line[6:])
delta = data.get("choices", [{}])[0].get("delta", {})
content = delta.get("content", "")
if content:
description += content
except Exception:
pass
logger.debug(f"[ImageProcessor] 图片描述生成成功: {description[:50]}...")
return description.strip()
except asyncio.CancelledError:
raise
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
last_error = str(e)
if attempt < max_retries:
logger.warning(f"[ImageProcessor] 图片描述网络错误: {e},重试 {attempt + 1}/{max_retries}")
await asyncio.sleep(1 * (attempt + 1))
continue
except Exception as e:
last_error = str(e)
if attempt < max_retries:
logger.warning(f"[ImageProcessor] 图片描述生成异常: {e},重试 {attempt + 1}/{max_retries}")
await asyncio.sleep(1 * (attempt + 1))
continue
logger.error(f"[ImageProcessor] 生成图片描述失败,已重试 {max_retries + 1} 次: {last_error}")
return ""
async def analyze_video(
self,
video_base64: str,
prompt: Optional[str] = None,
) -> str:
"""
使用 AI 分析视频内容
Args:
video_base64: 视频的 base64 数据
prompt: 分析提示词
Returns:
视频分析描述,失败返回空字符串
"""
if not self.config.video_api_url or not self.config.video_model:
logger.error("[ImageProcessor] 视频分析配置不完整")
return ""
# 去除 data:video/mp4;base64, 前缀(如果有)
if video_base64.startswith("data:"):
video_base64 = video_base64.split(",", 1)[1]
default_prompt = """请详细分析这个视频的内容,包括:
1. 视频的主要场景和环境
2. 出现的人物/物体及其动作
3. 视频中的文字、对话或声音(如果有)
4. 视频的整体主题或要表达的内容
5. 任何值得注意的细节
请用客观、详细的方式描述,不要加入主观评价。"""
analyze_prompt = prompt or default_prompt
full_url = f"{self.config.video_api_url}/{self.config.video_model}:generateContent"
payload = {
"contents": [
{
"parts": [
{"text": analyze_prompt},
{
"inline_data": {
"mime_type": "video/mp4",
"data": video_base64
}
}
]
}
],
"generationConfig": {
"maxOutputTokens": self.config.video_max_tokens
}
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.config.api_key}"
}
timeout = aiohttp.ClientTimeout(total=self.config.video_timeout)
max_retries = 2
retry_delay = 5
for attempt in range(max_retries + 1):
try:
logger.info(f"[ImageProcessor] 开始分析视频...{f' (重试 {attempt}/{max_retries})' if attempt > 0 else ''}")
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.post(full_url, json=payload, headers=headers) as resp:
if resp.status in [502, 503, 504]:
logger.warning(f"[ImageProcessor] 视频 API 临时错误: {resp.status}")
if attempt < max_retries:
await asyncio.sleep(retry_delay)
continue
return ""
if resp.status != 200:
error_text = await resp.text()
logger.error(f"[ImageProcessor] 视频 API 错误: {resp.status}, {error_text[:300]}")
return ""
result = await resp.json()
# 检查安全过滤
if "promptFeedback" in result:
feedback = result["promptFeedback"]
if feedback.get("blockReason"):
logger.warning(f"[ImageProcessor] 视频内容被过滤: {feedback.get('blockReason')}")
return ""
# 提取文本
if "candidates" in result and result["candidates"]:
for candidate in result["candidates"]:
if candidate.get("finishReason") == "SAFETY":
logger.warning("[ImageProcessor] 视频响应被安全过滤")
return ""
content = candidate.get("content", {})
for part in content.get("parts", []):
if "text" in part:
text = part["text"]
logger.info(f"[ImageProcessor] 视频分析完成,长度: {len(text)}")
return text
logger.error(f"[ImageProcessor] 视频分析无有效响应")
return ""
except asyncio.TimeoutError:
logger.warning(f"[ImageProcessor] 视频分析超时{f', 将重试...' if attempt < max_retries else ''}")
if attempt < max_retries:
await asyncio.sleep(retry_delay)
continue
return ""
except Exception as e:
logger.error(f"[ImageProcessor] 视频分析失败: {e}")
import traceback
logger.error(traceback.format_exc())
return ""
return ""
# ==================== 便捷函数 ====================
_default_processor: Optional[ImageProcessor] = None
def get_image_processor(config: Optional[MediaConfig] = None) -> ImageProcessor:
"""获取默认图片处理器"""
global _default_processor
if config:
_default_processor = ImageProcessor(config)
if _default_processor is None:
raise ValueError("ImageProcessor 未初始化,请先传入配置")
return _default_processor
def init_image_processor(config_dict: Dict[str, Any], temp_dir: Optional[Path] = None) -> ImageProcessor:
"""从配置字典初始化图片处理器"""
config = MediaConfig.from_dict(config_dict)
if temp_dir:
config.temp_dir = temp_dir
processor = ImageProcessor(config, temp_dir)
global _default_processor
_default_processor = processor
return processor
# ==================== 导出 ====================
__all__ = [
'MediaConfig',
'MediaResult',
'ImageProcessor',
'get_image_processor',
'init_image_processor',
]