Files
abot/douyin_parser/main.py
2025-03-11 11:37:20 +08:00

167 lines
7.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import logging
import re
import time
import tomllib
import os
import traceback
import requests
from typing import Dict, Any
from wcferry import WxMsg, Wcf
from robot_cmd.robot_command import GroupBotManager, Feature, PermissionStatus
import lz4.block as lb
class DouyinParserError(Exception):
"""抖音解析器自定义异常基类"""
pass
class DouyinParser:
description = "抖音无水印解析插件"
author = "姜不吃先生"
version = "1.0.2"
def __init__(self, wcf: Wcf, gbm: GroupBotManager):
self.url_pattern = re.compile(r'https?://v\.douyin\.com/\w+/?')
self.LOG = logging.getLogger(__name__)
self.wcf = wcf
self.gbm = gbm
with open("douyin_parser/config.toml", "rb") as f:
plugin_config = tomllib.load(f)
config = plugin_config["Douyin"]
self.enable = config.get("enable", True)
self.http_proxy = config.get("http_proxy", None)
self.LOG.info("[抖音] 插件初始化完成,代理设置: %s", self.http_proxy)
def _clean_response_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
if not data:
return data
data[
'cover'] = "https://is1-ssl.mzstatic.com/image/thumb/Purple221/v4/7c/49/e1/7c49e1af-ce92-d1c4-9a93-0a316e47ba94/AppIcon_TikTok-0-0-1x_U007epad-0-1-0-0-85-220.png/512x512bb.jpg"
return data
def _clean_url(self, url: str) -> str:
cleaned_url = url.strip().replace(';', '').replace('\n', '').replace('\r', '')
self.LOG.debug("[抖音] 清理后的URL: %s", cleaned_url)
return cleaned_url
def _get_real_video_url(self, video_url: str) -> str:
"""获取真实视频链接"""
max_retries = 3 # 最大重试次数
retry_delay = 2 # 重试延迟秒数
proxies = {"http": self.http_proxy, "https": self.http_proxy} if self.http_proxy else None
for retry in range(max_retries):
try:
self.LOG.info("[抖音] 开始获取真实视频链接: %s (第%d次尝试)", video_url, retry + 1)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Range': 'bytes=0-'
}
response = requests.get(video_url, headers=headers, proxies=proxies, allow_redirects=True, timeout=60)
if response.status_code in [200, 206]:
history = [resp.url for resp in response.history]
real_url = response.url
if history:
self.LOG.debug("[抖音] 重定向历史: %s", history)
if real_url != video_url and ('v3-' in real_url.lower() or 'douyinvod.com' in real_url.lower()):
self.LOG.info("[抖音] 成功获取真实链接: %s", real_url)
return real_url
else:
self.LOG.warning("[抖音] 未能获取到真实视频链接,准备重试")
if retry < max_retries - 1:
time.sleep(retry_delay)
continue
return video_url
else:
self.LOG.error("[抖音] 获取视频真实链接失败, 状态码: %d", response.status_code)
self.LOG.debug("[抖音] 响应头: %s", response.headers)
if retry < max_retries - 1:
time.sleep(retry_delay)
continue
return video_url
except Exception as e:
self.LOG.error("[抖音] 获取真实链接失败: %s (第%d次尝试)", str(e), retry + 1)
if retry < max_retries - 1:
time.sleep(retry_delay)
continue
return video_url
self.LOG.error("[抖音] 获取真实链接失败,已达到最大重试次数")
return video_url
def _parse_douyin(self, url: str) -> Dict[str, Any]:
try:
api_url = "https://apih.kfcgw50.me/api/douyin"
clean_url = self._clean_url(url)
params = {'url': clean_url, 'type': 'json'}
self.LOG.debug("[抖音] 请求API: %s, 参数: %s", api_url, repr(params))
proxy = {"http": self.http_proxy, "https": self.http_proxy} if self.http_proxy else None
response = requests.get(api_url, params=params, timeout=30, proxies=proxy)
if response.status_code != 200:
raise DouyinParserError(f"API请求失败状态码: {response.status_code}")
data = response.json()
self.LOG.debug("[抖音] API响应数据: %s", data)
if data.get("code") == 200:
result = data.get("data", {})
if result.get('video'):
result['video'] = self._get_real_video_url(result['video'])
return self._clean_response_data(result)
else:
raise DouyinParserError(data.get("message", "未知错误"))
except Exception as e:
self.LOG.error("[抖音] 解析过程发生未知错误: %s\n%s", str(e), traceback.format_exc())
raise DouyinParserError(f"未知错误: {str(e)}")
def handle_douyin_links(self, message: WxMsg):
if not self.enable:
return
# 如果触发了指令,但是没有权限,则返回权限不足
if self.gbm.get_group_permission(message.roomid, Feature.DOUYIN_PARSER) == PermissionStatus.DISABLED:
return
try:
match = self.url_pattern.search(message.content)
if not match:
return
original_url = self._clean_url(match.group(0))
self.LOG.info("发现抖音链接: %s", original_url)
self.LOG.info("检测到抖音分享链接,正在解析无水印视频...")
self.wcf.send_text(f"检测到抖音分享链接,正在解析无水印视频...",
(message.roomid if message.from_group() else message.sender), message.sender)
video_info = self._parse_douyin(original_url)
if not video_info:
raise DouyinParserError("无法获取视频信息")
video_url = video_info.get('video', '')
title = video_info.get('title', '无标题')
author = video_info.get('name', '未知作者')
cover = video_info.get('cover', '')
if not video_url:
raise DouyinParserError("无法获取视频地址")
# self.send_xml_video(message, title, author, video_url,cover)
self.wcf.send_rich_text("bot", "gh_11", title[:30], f"{title[:30]} - {author[:10]}", video_url, cover,
message.roomid)
except Exception as e:
self.LOG.error("[抖音] 解析过程发生未知错误: %s\n%s", str(e), traceback.format_exc())
raise DouyinParserError(f"未知错误: {str(e)}")
return