Files
JieXi/parsers/tiktok.py
2025-11-30 19:49:25 +08:00

72 lines
2.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from parsers.base import BaseParser
from typing import Dict
from urllib.parse import urlencode
class TikTokParser(BaseParser):
"""TikTok解析器"""
def parse(self, video_url: str) -> Dict:
"""解析TikTok视频"""
try:
# 步骤1: 提取视频ID
aweme_id = self._get_aweme_id(video_url)
# 步骤2: 获取视频详细信息
video_info = self._fetch_video_info(aweme_id)
# 步骤3: 提取并标准化数据
return self._extract_data(video_info)
except Exception as e:
raise Exception(f"TikTok解析失败: {str(e)}")
def _get_aweme_id(self, video_url: str) -> str:
"""提取视频ID"""
# 手动构建URL避免双重编码
url = f"{self.api_url}/api/tiktok/web/get_aweme_id?{urlencode({'url': video_url})}"
response = self._make_request(url)
data = response.json()
if data.get("code") != 200:
raise Exception(f"获取视频ID失败: {data.get('msg', '未知错误')}")
return data.get("data")
def _fetch_video_info(self, aweme_id: str) -> Dict:
"""获取视频详细信息"""
# 手动构建URL避免双重编码
url = f"{self.api_url}/api/tiktok/app/fetch_one_video?{urlencode({'aweme_id': aweme_id})}"
response = self._make_request(url)
data = response.json()
if data.get("code") != 200:
raise Exception("获取视频信息失败")
# TikTok API 返回的数据直接在 data 字段下,没有 aweme_detail 层级
return data.get("data", {})
def _extract_data(self, video_info: Dict) -> Dict:
"""提取并标准化数据"""
try:
# TikTok数据结构与抖音类似
cover = video_info.get("video", {}).get("cover_original_scale", {}).get("url_list", [""])[0]
if not cover:
cover = video_info.get("video", {}).get("cover", {}).get("url_list", [""])[0]
video_url = video_info.get("video", {}).get("play_addr", {}).get("url_list", [""])[0]
title = video_info.get("desc", "")
# 提取作者信息
author_info = video_info.get("author", {})
author_name = author_info.get("nickname", "")
author_signature = author_info.get("signature", "")
# 简介使用作者签名
description = author_signature or ""
return self._normalize_response(cover, video_url, title, description, author_name)
except Exception as e:
raise Exception(f"数据提取失败: {str(e)}")