71 lines
2.6 KiB
Python
71 lines
2.6 KiB
Python
from parsers.base import BaseParser
|
||
from typing import Dict
|
||
from urllib.parse import urlencode
|
||
|
||
class TikTokParser(BaseParser):
|
||
"""TikTok解析器"""
|
||
|
||
def parse(self, video_url: str) -> Dict:
|
||
"""解析TikTok视频"""
|
||
try:
|
||
# 步骤1: 提取视频ID
|
||
aweme_id = self._get_aweme_id(video_url)
|
||
|
||
# 步骤2: 获取视频详细信息
|
||
video_info = self._fetch_video_info(aweme_id)
|
||
|
||
# 步骤3: 提取并标准化数据
|
||
return self._extract_data(video_info)
|
||
except Exception as e:
|
||
raise Exception(f"TikTok解析失败: {str(e)}")
|
||
|
||
def _get_aweme_id(self, video_url: str) -> str:
|
||
"""提取视频ID"""
|
||
# 手动构建URL,避免双重编码
|
||
url = f"{self.api_url}/api/tiktok/web/get_aweme_id?{urlencode({'url': video_url})}"
|
||
|
||
response = self._make_request(url)
|
||
data = response.json()
|
||
|
||
if data.get("code") != 200:
|
||
raise Exception(f"获取视频ID失败: {data.get('msg', '未知错误')}")
|
||
|
||
return data.get("data")
|
||
|
||
def _fetch_video_info(self, aweme_id: str) -> Dict:
|
||
"""获取视频详细信息"""
|
||
# 手动构建URL,避免双重编码
|
||
url = f"{self.api_url}/api/tiktok/app/fetch_one_video?{urlencode({'aweme_id': aweme_id})}"
|
||
|
||
response = self._make_request(url)
|
||
data = response.json()
|
||
|
||
if data.get("code") != 200:
|
||
raise Exception("获取视频信息失败")
|
||
|
||
# TikTok API 返回的数据直接在 data 字段下,没有 aweme_detail 层级
|
||
return data.get("data", {})
|
||
|
||
def _extract_data(self, video_info: Dict) -> Dict:
|
||
"""提取并标准化数据"""
|
||
try:
|
||
# TikTok数据结构与抖音类似
|
||
cover = video_info.get("video", {}).get("cover_original_scale", {}).get("url_list", [""])[0]
|
||
if not cover:
|
||
cover = video_info.get("video", {}).get("cover", {}).get("url_list", [""])[0]
|
||
|
||
video_url = video_info.get("video", {}).get("play_addr", {}).get("url_list", [""])[0]
|
||
|
||
title = video_info.get("desc", "")
|
||
|
||
author = video_info.get("author", {})
|
||
author_name = author.get("nickname", "")
|
||
author_signature = author.get("signature", "")
|
||
description = f"Author: {author_name}"
|
||
if author_signature:
|
||
description += f" | {author_signature}"
|
||
|
||
return self._normalize_response(cover, video_url, title, description)
|
||
except Exception as e:
|
||
raise Exception(f"数据提取失败: {str(e)}")
|