优化IO问题,使用异步方案进行视频下载等操作。

This commit is contained in:
liuwei
2025-06-16 10:11:43 +08:00
parent 34f9158697
commit 02a387628c
5 changed files with 68 additions and 58 deletions

View File

@@ -1,5 +1,7 @@
import asyncio
import os
import requests
import aiohttp
import aiofiles
import uuid
from typing import Optional
from urllib.parse import urlparse
@@ -24,7 +26,7 @@ class MediaDownloader:
os.makedirs(self.download_dir, exist_ok=True)
self.LOG.info(f"媒体下载目录: {self.download_dir}")
def download_media(self, url: str, file_type: str = None) -> Optional[str]:
async def download_media(self, url: str, file_type: str = None) -> Optional[str]:
"""
下载媒体文件
@@ -42,7 +44,7 @@ class MediaDownloader:
# 如果没有文件名或扩展名,则生成一个随机文件名
if not filename or '.' not in filename:
ext = file_type if file_type else self._guess_file_type(url)
ext = file_type if file_type else await self._guess_file_type(url)
filename = f"{uuid.uuid4().hex}.{ext}" if ext else f"{uuid.uuid4().hex}"
local_path = os.path.join(self.download_dir, filename)
@@ -50,18 +52,19 @@ class MediaDownloader:
self.LOG.info(f"开始下载媒体文件: {url} -> {local_path}")
# 下载文件
response = requests.get(url, stream=True, timeout=30)
response.raise_for_status()
with open(local_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
async with aiohttp.ClientSession() as session:
async with session.get(url, timeout=30) as response:
response.raise_for_status()
async with aiofiles.open(local_path, 'wb') as f:
async for chunk in response.content.iter_chunked(8192):
if chunk:
await f.write(chunk)
self.LOG.info(f"媒体文件下载成功: {local_path}")
# 下载成功后清理旧文件
self.clear_downloads()
await self.clear_downloads()
return os.path.abspath(local_path)
@@ -69,7 +72,7 @@ class MediaDownloader:
self.LOG.error(f"下载媒体文件失败: {url}, 错误: {str(e)}")
return None
def _guess_file_type(self, url: str) -> Optional[str]:
async def _guess_file_type(self, url: str) -> Optional[str]:
"""
从URL推断文件类型
@@ -97,21 +100,22 @@ class MediaDownloader:
return 'pdf'
else:
# 检查Content-Type
response = requests.head(url, timeout=5)
content_type = response.headers.get('Content-Type', '')
if 'image/jpeg' in content_type:
return 'jpg'
elif 'image/png' in content_type:
return 'png'
elif 'image/gif' in content_type:
return 'gif'
async with aiohttp.ClientSession() as session:
async with session.head(url, timeout=5) as response:
content_type = response.headers.get('Content-Type', '')
if 'image/jpeg' in content_type:
return 'jpg'
elif 'image/png' in content_type:
return 'png'
elif 'image/gif' in content_type:
return 'gif'
return None
except:
return None
def clear_downloads(self, max_age_days: int = 3) -> None:
async def clear_downloads(self, max_age_days: int = 3) -> None:
"""
清理超过指定天数的下载文件
@@ -136,7 +140,7 @@ class MediaDownloader:
# 如果文件超过最大保留时间,则删除
if file_age > max_age_seconds:
try:
os.remove(file_path)
await asyncio.to_thread(os.remove, file_path)
cleared_count += 1
self.LOG.debug(f"已删除过期文件: {file_path}")
except Exception as e: