优化IO问题,使用异步方案进行视频下载等操作。
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
import asyncio
|
||||
import os
|
||||
import requests
|
||||
import aiohttp
|
||||
import aiofiles
|
||||
import uuid
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse
|
||||
@@ -24,7 +26,7 @@ class MediaDownloader:
|
||||
os.makedirs(self.download_dir, exist_ok=True)
|
||||
self.LOG.info(f"媒体下载目录: {self.download_dir}")
|
||||
|
||||
def download_media(self, url: str, file_type: str = None) -> Optional[str]:
|
||||
async def download_media(self, url: str, file_type: str = None) -> Optional[str]:
|
||||
"""
|
||||
下载媒体文件
|
||||
|
||||
@@ -42,7 +44,7 @@ class MediaDownloader:
|
||||
|
||||
# 如果没有文件名或扩展名,则生成一个随机文件名
|
||||
if not filename or '.' not in filename:
|
||||
ext = file_type if file_type else self._guess_file_type(url)
|
||||
ext = file_type if file_type else await self._guess_file_type(url)
|
||||
filename = f"{uuid.uuid4().hex}.{ext}" if ext else f"{uuid.uuid4().hex}"
|
||||
|
||||
local_path = os.path.join(self.download_dir, filename)
|
||||
@@ -50,18 +52,19 @@ class MediaDownloader:
|
||||
self.LOG.info(f"开始下载媒体文件: {url} -> {local_path}")
|
||||
|
||||
# 下载文件
|
||||
response = requests.get(url, stream=True, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(local_path, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url, timeout=30) as response:
|
||||
response.raise_for_status()
|
||||
|
||||
async with aiofiles.open(local_path, 'wb') as f:
|
||||
async for chunk in response.content.iter_chunked(8192):
|
||||
if chunk:
|
||||
await f.write(chunk)
|
||||
|
||||
self.LOG.info(f"媒体文件下载成功: {local_path}")
|
||||
|
||||
# 下载成功后清理旧文件
|
||||
self.clear_downloads()
|
||||
await self.clear_downloads()
|
||||
|
||||
return os.path.abspath(local_path)
|
||||
|
||||
@@ -69,7 +72,7 @@ class MediaDownloader:
|
||||
self.LOG.error(f"下载媒体文件失败: {url}, 错误: {str(e)}")
|
||||
return None
|
||||
|
||||
def _guess_file_type(self, url: str) -> Optional[str]:
|
||||
async def _guess_file_type(self, url: str) -> Optional[str]:
|
||||
"""
|
||||
从URL推断文件类型
|
||||
|
||||
@@ -97,21 +100,22 @@ class MediaDownloader:
|
||||
return 'pdf'
|
||||
else:
|
||||
# 检查Content-Type
|
||||
response = requests.head(url, timeout=5)
|
||||
content_type = response.headers.get('Content-Type', '')
|
||||
|
||||
if 'image/jpeg' in content_type:
|
||||
return 'jpg'
|
||||
elif 'image/png' in content_type:
|
||||
return 'png'
|
||||
elif 'image/gif' in content_type:
|
||||
return 'gif'
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.head(url, timeout=5) as response:
|
||||
content_type = response.headers.get('Content-Type', '')
|
||||
|
||||
if 'image/jpeg' in content_type:
|
||||
return 'jpg'
|
||||
elif 'image/png' in content_type:
|
||||
return 'png'
|
||||
elif 'image/gif' in content_type:
|
||||
return 'gif'
|
||||
|
||||
return None
|
||||
except:
|
||||
return None
|
||||
|
||||
def clear_downloads(self, max_age_days: int = 3) -> None:
|
||||
async def clear_downloads(self, max_age_days: int = 3) -> None:
|
||||
"""
|
||||
清理超过指定天数的下载文件
|
||||
|
||||
@@ -136,7 +140,7 @@ class MediaDownloader:
|
||||
# 如果文件超过最大保留时间,则删除
|
||||
if file_age > max_age_seconds:
|
||||
try:
|
||||
os.remove(file_path)
|
||||
await asyncio.to_thread(os.remove, file_path)
|
||||
cleared_count += 1
|
||||
self.LOG.debug(f"已删除过期文件: {file_path}")
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user