90 lines
3.1 KiB
Python
90 lines
3.1 KiB
Python
from parsers.base import BaseParser
|
||
from typing import Dict
|
||
from urllib.parse import urlencode
|
||
|
||
class DouyinParser(BaseParser):
|
||
"""抖音解析器"""
|
||
|
||
def parse(self, video_url: str) -> Dict:
|
||
"""解析抖音视频"""
|
||
try:
|
||
# 步骤1: 提取视频ID
|
||
aweme_id = self._get_aweme_id(video_url)
|
||
|
||
# 步骤2: 获取视频详细信息
|
||
video_info = self._fetch_video_info(aweme_id)
|
||
|
||
# 步骤3: 提取并标准化数据
|
||
return self._extract_data(video_info)
|
||
except Exception as e:
|
||
raise Exception(f"抖音解析失败: {str(e)}")
|
||
|
||
def _get_aweme_id(self, video_url: str) -> str:
|
||
"""提取视频ID"""
|
||
# 手动构建URL,避免双重编码
|
||
url = f"{self.api_url}/api/douyin/web/get_aweme_id?{urlencode({'url': video_url})}"
|
||
|
||
response = self._make_request(url)
|
||
data = response.json()
|
||
|
||
if data.get("code") != 200:
|
||
raise Exception(f"获取视频ID失败: {data.get('msg', '未知错误')}")
|
||
|
||
return data.get("data")
|
||
|
||
def _fetch_video_info(self, aweme_id: str) -> Dict:
|
||
"""获取视频详细信息"""
|
||
# 手动构建URL,避免双重编码
|
||
url = f"{self.api_url}/api/douyin/web/fetch_one_video?{urlencode({'aweme_id': aweme_id})}"
|
||
|
||
response = self._make_request(url)
|
||
data = response.json()
|
||
|
||
if data.get("code") != 200:
|
||
raise Exception("获取视频信息失败")
|
||
|
||
return data.get("data", {}).get("aweme_detail", {})
|
||
|
||
def _extract_data(self, video_info: Dict) -> Dict:
|
||
"""提取并标准化数据"""
|
||
try:
|
||
# 提取封面
|
||
cover = video_info.get("video", {}).get("cover_original_scale", {}).get("url_list", [""])[0]
|
||
|
||
# 提取视频URL
|
||
video_url = video_info.get("video", {}).get("play_addr", {}).get("url_list", [""])[0]
|
||
|
||
# 提取标题(描述)
|
||
title = video_info.get("desc", "")
|
||
|
||
# 提取作者信息作为简介
|
||
author = video_info.get("author", {})
|
||
author_name = author.get("nickname", "")
|
||
author_signature = author.get("signature", "")
|
||
description = f"作者: {author_name}"
|
||
if author_signature:
|
||
description += f" | {author_signature}"
|
||
|
||
return self._normalize_response(cover, video_url, title, description)
|
||
except Exception as e:
|
||
raise Exception(f"数据提取失败: {str(e)}")
|
||
|
||
|
||
class DouyinDownloadParser(BaseParser):
|
||
"""抖音下载解析器(直接下载)"""
|
||
|
||
def parse(self, video_url: str) -> Dict:
|
||
"""解析抖音视频(下载方式)"""
|
||
try:
|
||
download_url = f"{self.api_url}/api/download"
|
||
|
||
# 手动构建URL,避免双重编码
|
||
return self._normalize_response(
|
||
cover="",
|
||
video_url=f"{download_url}?{urlencode({'url': video_url})}",
|
||
title="抖音视频",
|
||
description="通过下载接口获取"
|
||
)
|
||
except Exception as e:
|
||
raise Exception(f"抖音下载解析失败: {str(e)}")
|