This commit is contained in:
2025-11-28 21:20:40 +08:00
commit f940b95b67
73 changed files with 15721 additions and 0 deletions

89
parsers/douyin.py Normal file
View File

@@ -0,0 +1,89 @@
from parsers.base import BaseParser
from typing import Dict
from urllib.parse import urlencode
class DouyinParser(BaseParser):
"""抖音解析器"""
def parse(self, video_url: str) -> Dict:
"""解析抖音视频"""
try:
# 步骤1: 提取视频ID
aweme_id = self._get_aweme_id(video_url)
# 步骤2: 获取视频详细信息
video_info = self._fetch_video_info(aweme_id)
# 步骤3: 提取并标准化数据
return self._extract_data(video_info)
except Exception as e:
raise Exception(f"抖音解析失败: {str(e)}")
def _get_aweme_id(self, video_url: str) -> str:
"""提取视频ID"""
# 手动构建URL避免双重编码
url = f"{self.api_url}/api/douyin/web/get_aweme_id?{urlencode({'url': video_url})}"
response = self._make_request(url)
data = response.json()
if data.get("code") != 200:
raise Exception(f"获取视频ID失败: {data.get('msg', '未知错误')}")
return data.get("data")
def _fetch_video_info(self, aweme_id: str) -> Dict:
"""获取视频详细信息"""
# 手动构建URL避免双重编码
url = f"{self.api_url}/api/douyin/web/fetch_one_video?{urlencode({'aweme_id': aweme_id})}"
response = self._make_request(url)
data = response.json()
if data.get("code") != 200:
raise Exception("获取视频信息失败")
return data.get("data", {}).get("aweme_detail", {})
def _extract_data(self, video_info: Dict) -> Dict:
"""提取并标准化数据"""
try:
# 提取封面
cover = video_info.get("video", {}).get("cover_original_scale", {}).get("url_list", [""])[0]
# 提取视频URL
video_url = video_info.get("video", {}).get("play_addr", {}).get("url_list", [""])[0]
# 提取标题(描述)
title = video_info.get("desc", "")
# 提取作者信息作为简介
author = video_info.get("author", {})
author_name = author.get("nickname", "")
author_signature = author.get("signature", "")
description = f"作者: {author_name}"
if author_signature:
description += f" | {author_signature}"
return self._normalize_response(cover, video_url, title, description)
except Exception as e:
raise Exception(f"数据提取失败: {str(e)}")
class DouyinDownloadParser(BaseParser):
"""抖音下载解析器(直接下载)"""
def parse(self, video_url: str) -> Dict:
"""解析抖音视频(下载方式)"""
try:
download_url = f"{self.api_url}/api/download"
# 手动构建URL避免双重编码
return self._normalize_response(
cover="",
video_url=f"{download_url}?{urlencode({'url': video_url})}",
title="抖音视频",
description="通过下载接口获取"
)
except Exception as e:
raise Exception(f"抖音下载解析失败: {str(e)}")