Files
abot/douyin_parser/main.py

186 lines
8.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import logging
import re
import time
import tomllib
import traceback
import requests
from typing import Dict, Any
from wcferry import WxMsg, Wcf
from robot_cmd.robot_command import GroupBotManager, Feature, PermissionStatus
class DouyinParserError(Exception):
"""抖音解析器自定义异常基类"""
pass
class DouyinParser:
description = "抖音无水印解析插件"
author = "姜不吃先生"
version = "1.0.2"
def __init__(self, wcf: Wcf, gbm: GroupBotManager):
self.url_pattern = re.compile(r'https?://v\.douyin\.com/\w+/?')
self.LOG = logging.getLogger(__name__)
self.wcf = wcf
self.gbm = gbm
with open("douyin_parser/config.toml", "rb") as f:
plugin_config = tomllib.load(f)
config = plugin_config["Douyin"]
self.enable = config.get("enable", True)
self.http_proxy = config.get("http_proxy", None)
self.LOG.info("[抖音] 插件初始化完成,代理设置: %s", self.http_proxy)
def _clean_response_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
if not data:
return data
data[
'cover'] = "https://is1-ssl.mzstatic.com/image/thumb/Purple221/v4/7c/49/e1/7c49e1af-ce92-d1c4-9a93-0a316e47ba94/AppIcon_TikTok-0-0-1x_U007epad-0-1-0-0-85-220.png/512x512bb.jpg"
return data
def _clean_url(self, url: str) -> str:
cleaned_url = url.strip().replace(';', '').replace('\n', '').replace('\r', '')
self.LOG.debug("[抖音] 清理后的URL: %s", cleaned_url)
return cleaned_url
def _get_real_video_url(self, video_url: str) -> str:
"""获取真实视频链接"""
max_retries = 3 # 最大重试次数
retry_delay = 2 # 重试延迟秒数
max_redirects = 10 # 最大重定向次数,防止死循环
proxies = {"http": self.http_proxy, "https": self.http_proxy} if self.http_proxy else None
redirect_history = []
for retry in range(max_retries):
try:
self.LOG.info("[抖音] 开始获取真实视频链接: %s (第%d次尝试)", video_url, retry + 1)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Range': 'bytes=0-'
}
# 默认使用 allow_redirects=True 获取历史记录
response = requests.get(video_url, headers=headers, proxies=proxies, allow_redirects=True, timeout=60)
if response.history:
redirect_history = [resp.url for resp in response.history]
real_url = response.url
else:
# response.history 为空,手动解析重定向
current_url = video_url
for _ in range(max_redirects): # 限制最大重定向次数
resp = requests.get(current_url, headers=headers, proxies=proxies, allow_redirects=False,
timeout=60)
new_url = resp.headers.get('Location')
if not new_url:
break # 没有新的 Location停止
if not new_url.startswith("http"):
from urllib.parse import urljoin
new_url = urljoin(current_url, new_url) # 处理相对路径重定向
if new_url in redirect_history:
self.LOG.info("[抖音] 检测到循环重定向: %s", new_url)
break # 避免死循环
redirect_history.append(new_url)
self.LOG.info("[抖音] 发现重定向: %s -> %s", current_url, new_url)
current_url = new_url
real_url = current_url
if redirect_history:
self.LOG.info("[抖音] 重定向历史: %s", redirect_history)
if real_url != video_url and ('v3-' in real_url.lower() or 'douyinvod.com' in real_url.lower()):
self.LOG.info("[抖音] 成功获取真实链接: %s", real_url)
return real_url
else:
self.LOG.info("[抖音] 未能获取到符合预期的视频链接,准备重试")
if retry < max_retries - 1:
time.sleep(retry_delay)
continue
return video_url
except Exception as e:
self.LOG.error("[抖音] 获取真实链接失败: %s (第%d次尝试)", str(e), retry + 1)
if retry < max_retries - 1:
time.sleep(retry_delay)
continue
return video_url
self.LOG.error("[抖音] 获取真实链接失败,已达到最大重试次数")
return video_url
def _parse_douyin(self, url: str) -> Dict[str, Any]:
try:
api_url = "https://apih.kfcgw50.me/api/douyin"
clean_url = self._clean_url(url)
params = {'url': clean_url, 'type': 'json'}
self.LOG.debug("[抖音] 请求API: %s, 参数: %s", api_url, repr(params))
proxy = {"http": self.http_proxy, "https": self.http_proxy} if self.http_proxy else None
response = requests.get(api_url, params=params, timeout=30, proxies=proxy)
if response.status_code != 200:
raise DouyinParserError(f"API请求失败状态码: {response.status_code}")
data = response.json()
self.LOG.debug("[抖音] API响应数据: %s", data)
if data.get("code") == 200:
result = data.get("data", {})
if result.get('video'):
result['video'] = self._get_real_video_url(result['video'])
return self._clean_response_data(result)
else:
raise DouyinParserError(data.get("message", "未知错误"))
except Exception as e:
self.LOG.error("[抖音] 解析过程发生未知错误: %s\n%s", str(e), traceback.format_exc())
raise DouyinParserError(f"未知错误: {str(e)}")
def handle_douyin_links(self, message: WxMsg):
if not self.enable:
return
# 如果触发了指令,但是没有权限,则返回权限不足
if self.gbm.get_group_permission(message.roomid, Feature.DOUYIN_PARSER) == PermissionStatus.DISABLED:
return
try:
match = self.url_pattern.search(message.content)
if not match:
return
original_url = self._clean_url(match.group(0))
self.LOG.info("发现抖音链接: %s", original_url)
self.LOG.info("检测到抖音分享链接,正在解析无水印视频...")
self.wcf.send_text(f"检测到抖音分享链接,正在解析无水印视频...",
(message.roomid if message.from_group() else message.sender), message.sender)
video_info = self._parse_douyin(original_url)
if not video_info:
raise DouyinParserError("无法获取视频信息")
video_url = video_info.get('video', '')
title = video_info.get('title', '无标题')
author = video_info.get('name', '未知作者')
cover = video_info.get('cover', '')
if not video_url:
raise DouyinParserError("无法获取视频地址")
self.wcf.send_rich_text("bot", "gh_11", title[:30], f"{title[:30]} - {author[:10]}", video_url, cover,
message.roomid)
except Exception as e:
self.LOG.error("[抖音] 解析过程发生未知错误: %s\n%s", str(e), traceback.format_exc())
raise DouyinParserError(f"未知错误: {str(e)}")
return