Files
JieXi/parsers/douyin.py
2025-11-30 19:49:25 +08:00

90 lines
3.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from parsers.base import BaseParser
from typing import Dict
from urllib.parse import urlencode
class DouyinParser(BaseParser):
"""抖音解析器"""
def parse(self, video_url: str) -> Dict:
"""解析抖音视频"""
try:
# 步骤1: 提取视频ID
aweme_id = self._get_aweme_id(video_url)
# 步骤2: 获取视频详细信息
video_info = self._fetch_video_info(aweme_id)
# 步骤3: 提取并标准化数据
return self._extract_data(video_info)
except Exception as e:
raise Exception(f"抖音解析失败: {str(e)}")
def _get_aweme_id(self, video_url: str) -> str:
"""提取视频ID"""
# 手动构建URL避免双重编码
url = f"{self.api_url}/api/douyin/web/get_aweme_id?{urlencode({'url': video_url})}"
response = self._make_request(url)
data = response.json()
if data.get("code") != 200:
raise Exception(f"获取视频ID失败: {data.get('msg', '未知错误')}")
return data.get("data")
def _fetch_video_info(self, aweme_id: str) -> Dict:
"""获取视频详细信息"""
# 手动构建URL避免双重编码
url = f"{self.api_url}/api/douyin/web/fetch_one_video?{urlencode({'aweme_id': aweme_id})}"
response = self._make_request(url)
data = response.json()
if data.get("code") != 200:
raise Exception("获取视频信息失败")
return data.get("data", {}).get("aweme_detail", {})
def _extract_data(self, video_info: Dict) -> Dict:
"""提取并标准化数据"""
try:
# 提取封面
cover = video_info.get("video", {}).get("cover_original_scale", {}).get("url_list", [""])[0]
# 提取视频URL
video_url = video_info.get("video", {}).get("play_addr", {}).get("url_list", [""])[0]
# 提取标题(描述)
title = video_info.get("desc", "")
# 提取作者信息
author_info = video_info.get("author", {})
author_name = author_info.get("nickname", "")
author_signature = author_info.get("signature", "")
# 简介使用作者签名
description = author_signature or ""
return self._normalize_response(cover, video_url, title, description, author_name)
except Exception as e:
raise Exception(f"数据提取失败: {str(e)}")
class DouyinDownloadParser(BaseParser):
"""抖音下载解析器(直接下载)"""
def parse(self, video_url: str) -> Dict:
"""解析抖音视频(下载方式)"""
try:
download_url = f"{self.api_url}/api/download"
# 手动构建URL避免双重编码
return self._normalize_response(
cover="",
video_url=f"{download_url}?{urlencode({'url': video_url})}",
title="抖音视频",
description="通过下载接口获取"
)
except Exception as e:
raise Exception(f"抖音下载解析失败: {str(e)}")