From f1cc064b4db9bdd65bc62e87cd91b83b5ce5bdf4 Mon Sep 17 00:00:00 2001 From: liuwei Date: Wed, 12 Mar 2025 09:49:39 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8A=A0=E5=85=A5=E4=BB=A3=E7=90=86=E8=BF=9B?= =?UTF-8?q?=E8=A1=8C=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- douyin_parser/config.toml | 2 +- douyin_parser/douyin_test.py | 202 +++++++++++++++++++++++++++++++++++ douyin_parser/main.py | 4 +- 3 files changed, 205 insertions(+), 3 deletions(-) create mode 100644 douyin_parser/douyin_test.py diff --git a/douyin_parser/config.toml b/douyin_parser/config.toml index a2b8e6e..494d09f 100644 --- a/douyin_parser/config.toml +++ b/douyin_parser/config.toml @@ -4,4 +4,4 @@ enable = true # Http代理设置(用于获取真实链接发送卡片,如果家里有ipv6,可以设置为空) # 格式: http://用户名:密码@代理地址:代理端口 # 例如:http://127.0.0.1:7890 -http_proxy = "" \ No newline at end of file +http_proxy = "http://192.168.2.1:7890" \ No newline at end of file diff --git a/douyin_parser/douyin_test.py b/douyin_parser/douyin_test.py new file mode 100644 index 0000000..c40b7a7 --- /dev/null +++ b/douyin_parser/douyin_test.py @@ -0,0 +1,202 @@ +import logging +import os +import re +import time +import tomllib +import traceback +import requests +from typing import Dict, Any + +from wcferry import WxMsg, Wcf + +from robot_cmd.robot_command import GroupBotManager, Feature, PermissionStatus + + +class DouyinParserError(Exception): + """抖音解析器自定义异常基类""" + pass + + +def _clean_response_data(data: Dict[str, Any]) -> Dict[str, Any]: + if not data: + return data + data[ + 'cover'] = "https://is1-ssl.mzstatic.com/image/thumb/Purple221/v4/7c/49/e1/7c49e1af-ce92-d1c4-9a93-0a316e47ba94/AppIcon_TikTok-0-0-1x_U007epad-0-1-0-0-85-220.png/512x512bb.jpg" + + return data + + +def _clean_url(url: str) -> str: + cleaned_url = url.strip().replace(';', '').replace('\n', '').replace('\r', '') + print("[抖音] 清理后的URL: %s", cleaned_url) + return cleaned_url + + +def _get_real_video_url(video_url: str) -> str: + """获取真实视频链接""" + max_retries = 3 # 最大重试次数 + retry_delay = 2 # 重试延迟秒数 + max_redirects = 10 # 最大重定向次数,防止死循环 + proxies = None + redirect_history = [] + + for retry in range(max_retries): + try: + print("[抖音] 开始获取真实视频链接: %s (第%d次尝试)", video_url, retry + 1) + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'Range': 'bytes=0-' + } + + # 默认使用 allow_redirects=True 获取历史记录 + response = requests.get(video_url, headers=headers, proxies=proxies, allow_redirects=True, timeout=60) + + if response.history: + redirect_history = [resp.url for resp in response.history] + real_url = response.url + else: + # response.history 为空,手动解析重定向 + current_url = video_url + for _ in range(max_redirects): # 限制最大重定向次数 + resp = requests.get(current_url, headers=headers, proxies=proxies, allow_redirects=False, + timeout=60) + new_url = resp.headers.get('Location') + + if not new_url: + break # 没有新的 Location,停止 + + if not new_url.startswith("http"): + from urllib.parse import urljoin + new_url = urljoin(current_url, new_url) # 处理相对路径重定向 + + if new_url in redirect_history: + print("[抖音] 检测到循环重定向: %s", new_url) + break # 避免死循环 + + redirect_history.append(new_url) + print("[抖音] 发现重定向: %s -> %s", current_url, new_url) + current_url = new_url + + real_url = current_url + + if redirect_history: + print("[抖音] 重定向历史: %s", redirect_history) + + if real_url != video_url and ('v3-' in real_url.lower() or 'douyinvod.com' in real_url.lower()): + print("[抖音] 成功获取真实链接: %s", real_url) + return real_url + else: + print("[抖音] 未能获取到符合预期的视频链接,准备重试") + if retry < max_retries - 1: + time.sleep(retry_delay) + continue + return video_url + + except Exception as e: + print("[抖音] 获取真实链接失败: %s (第%d次尝试)", str(e), retry + 1) + if retry < max_retries - 1: + time.sleep(retry_delay) + continue + return video_url + + print("[抖音] 获取真实链接失败,已达到最大重试次数") + return video_url + + +def _parse_douyin(url: str) -> Dict[str, Any]: + try: + api_url = "https://apih.kfcgw50.me/api/douyin" + clean_url = _clean_url(url) + params = {'url': clean_url, 'type': 'json'} + + print("[抖音] 请求API: %s, 参数: %s", api_url, repr(params)) + proxy = None + response = requests.get(api_url, params=params, timeout=30, proxies=proxy) + + if response.status_code != 200: + raise DouyinParserError(f"API请求失败,状态码: {response.status_code}") + + data = response.json() + print("[抖音] API响应数据: %s", data) + + if data.get("code") == 200: + result = data.get("data", {}) + if result.get('video'): + result['video'] = _get_real_video_url(result['video']) + return _clean_response_data(result) + else: + raise DouyinParserError(data.get("message", "未知错误")) + except Exception as e: + print("[抖音] 解析过程发生未知错误: %s\n%s", str(e), traceback.format_exc()) + raise DouyinParserError(f"未知错误: {str(e)}") + + +def handle_douyin_links(content): + try: + + original_url = _clean_url(content) + print("发现抖音链接: %s", original_url) + print("检测到抖音分享链接,正在解析无水印视频...") + video_info = _parse_douyin(original_url) + if not video_info: + raise DouyinParserError("无法获取视频信息") + + video_url = video_info.get('video', '') + title = video_info.get('title', '无标题') + author = video_info.get('name', '未知作者') + cover = video_info.get('cover', '') + + if not video_url: + raise DouyinParserError("无法获取视频地址") + print(video_url) + except Exception as e: + print("[抖音] 解析过程发生未知错误: %s\n%s", str(e), traceback.format_exc()) + raise DouyinParserError(f"未知错误: {str(e)}") + return video_url + +def download_stream(url, save_path): + """ + 从指定URL读取视频流并保存到本地 + :param url: 视频流的URL + :param save_path: 本地保存路径(包含文件名,例如 "video.mp4") + """ + try: + # 发送GET请求,启用流式传输 + response = requests.get(url, stream=True) + + # 检查请求是否成功 + response.raise_for_status() # 如果状态码不是200,将抛出异常 + + # 确保保存路径的目录存在 + os.makedirs(os.path.dirname(save_path) or ".", exist_ok=True) + + # 检查是否是视频流(可选,根据Content-Type判断) + content_type = response.headers.get("Content-Type", "").lower() + if "video" not in content_type and "application/octet-stream" not in content_type: + print(f"警告: 返回的可能不是视频流,Content-Type: {content_type}") + print("响应内容预览:", response.text[:100]) # 打印前100字符查看 + return + + # 以二进制写入模式保存流数据 + with open(save_path, "wb") as file: + for chunk in response.iter_content(chunk_size=1024): # 分块读取,每块1KB + if chunk: # 过滤空块 + file.write(chunk) + print(f"视频已下载到: {save_path}") + return os.path.abspath(save_path) + except requests.RequestException as e: + print(f"请求失败: {e}") + except IOError as e: + print(f"文件写入失败: {e}") + except Exception as e: + print(f"发生未知错误: {e}") + + +if __name__ == '__main__': + str = """https://v.douyin.com/i5gm3gKs/""" + + url = handle_douyin_links(str) + # download_stream(url,"douyin.mp4") + diff --git a/douyin_parser/main.py b/douyin_parser/main.py index 26abf7d..32d2137 100644 --- a/douyin_parser/main.py +++ b/douyin_parser/main.py @@ -140,8 +140,8 @@ class DouyinParser: if data.get("code") == 200: result = data.get("data", {}) self.LOG.info("[抖音] API响应数据result: %s", result) - # if result.get('video'): - # result['video'] = self._get_real_video_url(result['video']) + if result.get('video'): + result['video'] = self._get_real_video_url(result['video']) return self._clean_response_data(result) else: raise DouyinParserError(data.get("message", "未知错误"))