抖音视频-服务器无法提取,暂时不处理
This commit is contained in:
@@ -1,202 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import tomllib
|
||||
import traceback
|
||||
import requests
|
||||
from typing import Dict, Any
|
||||
|
||||
from wcferry import WxMsg, Wcf
|
||||
|
||||
from robot_cmd.robot_command import GroupBotManager, Feature, PermissionStatus
|
||||
|
||||
|
||||
class DouyinParserError(Exception):
|
||||
"""抖音解析器自定义异常基类"""
|
||||
pass
|
||||
|
||||
|
||||
def _clean_response_data(data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
if not data:
|
||||
return data
|
||||
data[
|
||||
'cover'] = "https://is1-ssl.mzstatic.com/image/thumb/Purple221/v4/7c/49/e1/7c49e1af-ce92-d1c4-9a93-0a316e47ba94/AppIcon_TikTok-0-0-1x_U007epad-0-1-0-0-85-220.png/512x512bb.jpg"
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def _clean_url(url: str) -> str:
|
||||
cleaned_url = url.strip().replace(';', '').replace('\n', '').replace('\r', '')
|
||||
print("[抖音] 清理后的URL: %s", cleaned_url)
|
||||
return cleaned_url
|
||||
|
||||
|
||||
def _get_real_video_url(video_url: str) -> str:
|
||||
"""获取真实视频链接"""
|
||||
max_retries = 3 # 最大重试次数
|
||||
retry_delay = 2 # 重试延迟秒数
|
||||
max_redirects = 10 # 最大重定向次数,防止死循环
|
||||
proxies = None
|
||||
redirect_history = []
|
||||
|
||||
for retry in range(max_retries):
|
||||
try:
|
||||
print("[抖音] 开始获取真实视频链接: %s (第%d次尝试)", video_url, retry + 1)
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'Range': 'bytes=0-'
|
||||
}
|
||||
|
||||
# 默认使用 allow_redirects=True 获取历史记录
|
||||
response = requests.get(video_url, headers=headers, proxies=proxies, allow_redirects=True, timeout=60)
|
||||
|
||||
if response.history:
|
||||
redirect_history = [resp.url for resp in response.history]
|
||||
real_url = response.url
|
||||
else:
|
||||
# response.history 为空,手动解析重定向
|
||||
current_url = video_url
|
||||
for _ in range(max_redirects): # 限制最大重定向次数
|
||||
resp = requests.get(current_url, headers=headers, proxies=proxies, allow_redirects=False,
|
||||
timeout=60)
|
||||
new_url = resp.headers.get('Location')
|
||||
|
||||
if not new_url:
|
||||
break # 没有新的 Location,停止
|
||||
|
||||
if not new_url.startswith("http"):
|
||||
from urllib.parse import urljoin
|
||||
new_url = urljoin(current_url, new_url) # 处理相对路径重定向
|
||||
|
||||
if new_url in redirect_history:
|
||||
print("[抖音] 检测到循环重定向: %s", new_url)
|
||||
break # 避免死循环
|
||||
|
||||
redirect_history.append(new_url)
|
||||
print("[抖音] 发现重定向: %s -> %s", current_url, new_url)
|
||||
current_url = new_url
|
||||
|
||||
real_url = current_url
|
||||
|
||||
if redirect_history:
|
||||
print("[抖音] 重定向历史: %s", redirect_history)
|
||||
|
||||
if real_url != video_url and ('v3-' in real_url.lower() or 'douyinvod.com' in real_url.lower()):
|
||||
print("[抖音] 成功获取真实链接: %s", real_url)
|
||||
return real_url
|
||||
else:
|
||||
print("[抖音] 未能获取到符合预期的视频链接,准备重试")
|
||||
if retry < max_retries - 1:
|
||||
time.sleep(retry_delay)
|
||||
continue
|
||||
return video_url
|
||||
|
||||
except Exception as e:
|
||||
print("[抖音] 获取真实链接失败: %s (第%d次尝试)", str(e), retry + 1)
|
||||
if retry < max_retries - 1:
|
||||
time.sleep(retry_delay)
|
||||
continue
|
||||
return video_url
|
||||
|
||||
print("[抖音] 获取真实链接失败,已达到最大重试次数")
|
||||
return video_url
|
||||
|
||||
|
||||
def _parse_douyin(url: str) -> Dict[str, Any]:
|
||||
try:
|
||||
api_url = "https://apih.kfcgw50.me/api/douyin"
|
||||
clean_url = _clean_url(url)
|
||||
params = {'url': clean_url, 'type': 'json'}
|
||||
|
||||
print("[抖音] 请求API: %s, 参数: %s", api_url, repr(params))
|
||||
proxy = None
|
||||
response = requests.get(api_url, params=params, timeout=30, proxies=proxy)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise DouyinParserError(f"API请求失败,状态码: {response.status_code}")
|
||||
|
||||
data = response.json()
|
||||
print("[抖音] API响应数据: %s", data)
|
||||
|
||||
if data.get("code") == 200:
|
||||
result = data.get("data", {})
|
||||
if result.get('video'):
|
||||
result['video'] = _get_real_video_url(result['video'])
|
||||
return _clean_response_data(result)
|
||||
else:
|
||||
raise DouyinParserError(data.get("message", "未知错误"))
|
||||
except Exception as e:
|
||||
print("[抖音] 解析过程发生未知错误: %s\n%s", str(e), traceback.format_exc())
|
||||
raise DouyinParserError(f"未知错误: {str(e)}")
|
||||
|
||||
|
||||
def handle_douyin_links(content):
|
||||
try:
|
||||
|
||||
original_url = _clean_url(content)
|
||||
print("发现抖音链接: %s", original_url)
|
||||
print("检测到抖音分享链接,正在解析无水印视频...")
|
||||
video_info = _parse_douyin(original_url)
|
||||
if not video_info:
|
||||
raise DouyinParserError("无法获取视频信息")
|
||||
|
||||
video_url = video_info.get('video', '')
|
||||
title = video_info.get('title', '无标题')
|
||||
author = video_info.get('name', '未知作者')
|
||||
cover = video_info.get('cover', '')
|
||||
|
||||
if not video_url:
|
||||
raise DouyinParserError("无法获取视频地址")
|
||||
print(video_url)
|
||||
except Exception as e:
|
||||
print("[抖音] 解析过程发生未知错误: %s\n%s", str(e), traceback.format_exc())
|
||||
raise DouyinParserError(f"未知错误: {str(e)}")
|
||||
return video_url
|
||||
|
||||
def download_stream(url, save_path):
|
||||
"""
|
||||
从指定URL读取视频流并保存到本地
|
||||
:param url: 视频流的URL
|
||||
:param save_path: 本地保存路径(包含文件名,例如 "video.mp4")
|
||||
"""
|
||||
try:
|
||||
# 发送GET请求,启用流式传输
|
||||
response = requests.get(url, stream=True)
|
||||
|
||||
# 检查请求是否成功
|
||||
response.raise_for_status() # 如果状态码不是200,将抛出异常
|
||||
|
||||
# 确保保存路径的目录存在
|
||||
os.makedirs(os.path.dirname(save_path) or ".", exist_ok=True)
|
||||
|
||||
# 检查是否是视频流(可选,根据Content-Type判断)
|
||||
content_type = response.headers.get("Content-Type", "").lower()
|
||||
if "video" not in content_type and "application/octet-stream" not in content_type:
|
||||
print(f"警告: 返回的可能不是视频流,Content-Type: {content_type}")
|
||||
print("响应内容预览:", response.text[:100]) # 打印前100字符查看
|
||||
return
|
||||
|
||||
# 以二进制写入模式保存流数据
|
||||
with open(save_path, "wb") as file:
|
||||
for chunk in response.iter_content(chunk_size=1024): # 分块读取,每块1KB
|
||||
if chunk: # 过滤空块
|
||||
file.write(chunk)
|
||||
print(f"视频已下载到: {save_path}")
|
||||
return os.path.abspath(save_path)
|
||||
except requests.RequestException as e:
|
||||
print(f"请求失败: {e}")
|
||||
except IOError as e:
|
||||
print(f"文件写入失败: {e}")
|
||||
except Exception as e:
|
||||
print(f"发生未知错误: {e}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
str = """https://v.douyin.com/i5gm3gKs/"""
|
||||
|
||||
url = handle_douyin_links(str)
|
||||
# download_stream(url,"douyin.mp4")
|
||||
|
||||
@@ -181,48 +181,48 @@ class DouyinParser:
|
||||
|
||||
self.wcf.send_rich_text("bot", "gh_11", title[:30], f"{title[:30]} - {author[:10]}", video_url, cover,
|
||||
message.roomid)
|
||||
self.LOG.info(f"video_url: {video_url}, title: {title}, author: {author}, cover: {cover}")
|
||||
mp4_path = self.download_stream(video_url, "douyin_parser/down_load_dir/douyin.mp4")
|
||||
self.LOG.info(f"发送抖音视频:{mp4_path}")
|
||||
self.wcf.send_file(mp4_path, message.roomid)
|
||||
# self.LOG.info(f"video_url: {video_url}, title: {title}, author: {author}, cover: {cover}")
|
||||
# mp4_path = self.download_stream(video_url, "douyin_parser/down_load_dir/douyin.mp4")
|
||||
# self.LOG.info(f"发送抖音视频:{mp4_path}")
|
||||
# self.wcf.send_file(mp4_path, message.roomid)
|
||||
except Exception as e:
|
||||
self.LOG.error("[抖音] 解析过程发生未知错误: %s\n%s", str(e), traceback.format_exc())
|
||||
raise DouyinParserError(f"未知错误: {str(e)}")
|
||||
return
|
||||
|
||||
def download_stream(self, url, save_path):
|
||||
"""
|
||||
从指定URL读取视频流并保存到本地
|
||||
:param url: 视频流的URL
|
||||
:param save_path: 本地保存路径(包含文件名,例如 "video.mp4")
|
||||
"""
|
||||
try:
|
||||
# 发送GET请求,启用流式传输
|
||||
response = requests.get(url, stream=True)
|
||||
|
||||
# 检查请求是否成功
|
||||
response.raise_for_status() # 如果状态码不是200,将抛出异常
|
||||
|
||||
# 确保保存路径的目录存在
|
||||
os.makedirs(os.path.dirname(save_path) or ".", exist_ok=True)
|
||||
|
||||
# 检查是否是视频流(可选,根据Content-Type判断)
|
||||
content_type = response.headers.get("Content-Type", "").lower()
|
||||
if "video" not in content_type and "application/octet-stream" not in content_type:
|
||||
print(f"警告: 返回的可能不是视频流,Content-Type: {content_type}")
|
||||
print("响应内容预览:", response.text[:100]) # 打印前100字符查看
|
||||
return
|
||||
|
||||
# 以二进制写入模式保存流数据
|
||||
with open(save_path, "wb") as file:
|
||||
for chunk in response.iter_content(chunk_size=1024): # 分块读取,每块1KB
|
||||
if chunk: # 过滤空块
|
||||
file.write(chunk)
|
||||
print(f"视频已下载到: {save_path}")
|
||||
return os.path.abspath(save_path)
|
||||
except requests.RequestException as e:
|
||||
print(f"请求失败: {e}")
|
||||
except IOError as e:
|
||||
print(f"文件写入失败: {e}")
|
||||
except Exception as e:
|
||||
print(f"发生未知错误: {e}")
|
||||
#
|
||||
# def download_stream(self, url, save_path):
|
||||
# """
|
||||
# 从指定URL读取视频流并保存到本地
|
||||
# :param url: 视频流的URL
|
||||
# :param save_path: 本地保存路径(包含文件名,例如 "video.mp4")
|
||||
# """
|
||||
# try:
|
||||
# # 发送GET请求,启用流式传输
|
||||
# response = requests.get(url, stream=True)
|
||||
#
|
||||
# # 检查请求是否成功
|
||||
# response.raise_for_status() # 如果状态码不是200,将抛出异常
|
||||
#
|
||||
# # 确保保存路径的目录存在
|
||||
# os.makedirs(os.path.dirname(save_path) or ".", exist_ok=True)
|
||||
#
|
||||
# # 检查是否是视频流(可选,根据Content-Type判断)
|
||||
# content_type = response.headers.get("Content-Type", "").lower()
|
||||
# if "video" not in content_type and "application/octet-stream" not in content_type:
|
||||
# print(f"警告: 返回的可能不是视频流,Content-Type: {content_type}")
|
||||
# print("响应内容预览:", response.text[:100]) # 打印前100字符查看
|
||||
# return
|
||||
#
|
||||
# # 以二进制写入模式保存流数据
|
||||
# with open(save_path, "wb") as file:
|
||||
# for chunk in response.iter_content(chunk_size=1024): # 分块读取,每块1KB
|
||||
# if chunk: # 过滤空块
|
||||
# file.write(chunk)
|
||||
# print(f"视频已下载到: {save_path}")
|
||||
# return os.path.abspath(save_path)
|
||||
# except requests.RequestException as e:
|
||||
# print(f"请求失败: {e}")
|
||||
# except IOError as e:
|
||||
# print(f"文件写入失败: {e}")
|
||||
# except Exception as e:
|
||||
# print(f"发生未知错误: {e}")
|
||||
|
||||
Reference in New Issue
Block a user