From 8e4e4116afc170d835272f6a614c2e942ba00ed4 Mon Sep 17 00:00:00 2001 From: liuwei Date: Tue, 11 Mar 2025 11:37:20 +0800 Subject: [PATCH] =?UTF-8?q?=E7=9C=9F=E5=AE=9E=E9=93=BE=E6=8E=A5=E6=B5=8B?= =?UTF-8?q?=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- douyin_parser/main.py | 110 +++++++++++++----------------------------- 1 file changed, 34 insertions(+), 76 deletions(-) diff --git a/douyin_parser/main.py b/douyin_parser/main.py index 1a593f7..d7478ba 100644 --- a/douyin_parser/main.py +++ b/douyin_parser/main.py @@ -1,5 +1,6 @@ import logging import re +import time import tomllib import os import traceback @@ -50,27 +51,51 @@ class DouyinParser: return cleaned_url def _get_real_video_url(self, video_url: str) -> str: - max_retries = 3 - retry_delay = 2 + """获取真实视频链接""" + max_retries = 3 # 最大重试次数 + retry_delay = 2 # 重试延迟秒数 + proxies = {"http": self.http_proxy, "https": self.http_proxy} if self.http_proxy else None for retry in range(max_retries): try: self.LOG.info("[抖音] 开始获取真实视频链接: %s (第%d次尝试)", video_url, retry + 1) - proxy = {"http": self.http_proxy, "https": self.http_proxy} if self.http_proxy else None - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)...' + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'Range': 'bytes=0-' } - response = requests.get(video_url, headers=headers, proxies=proxy, allow_redirects=True, timeout=60) + response = requests.get(video_url, headers=headers, proxies=proxies, allow_redirects=True, timeout=60) if response.status_code in [200, 206]: + history = [resp.url for resp in response.history] real_url = response.url - self.LOG.info("[抖音] 成功获取真实链接: %s", real_url) - return real_url + + if history: + self.LOG.debug("[抖音] 重定向历史: %s", history) + + if real_url != video_url and ('v3-' in real_url.lower() or 'douyinvod.com' in real_url.lower()): + self.LOG.info("[抖音] 成功获取真实链接: %s", real_url) + return real_url + else: + self.LOG.warning("[抖音] 未能获取到真实视频链接,准备重试") + if retry < max_retries - 1: + time.sleep(retry_delay) + continue + return video_url else: self.LOG.error("[抖音] 获取视频真实链接失败, 状态码: %d", response.status_code) + self.LOG.debug("[抖音] 响应头: %s", response.headers) + if retry < max_retries - 1: + time.sleep(retry_delay) + continue + return video_url except Exception as e: - self.LOG.error("[抖音] 获取真实链接失败: %s", str(e)) + self.LOG.error("[抖音] 获取真实链接失败: %s (第%d次尝试)", str(e), retry + 1) + if retry < max_retries - 1: + time.sleep(retry_delay) + continue + return video_url self.LOG.error("[抖音] 获取真实链接失败,已达到最大重试次数") return video_url @@ -139,70 +164,3 @@ class DouyinParser: self.LOG.error("[抖音] 解析过程发生未知错误: %s\n%s", str(e), traceback.format_exc()) raise DouyinParserError(f"未知错误: {str(e)}") return - - def send_xml_video(self, message: WxMsg, title, author, video_url, cover): - - video_title = f"{title[:30]} - {author[:10]}" if author else title[:40], - xml_message = f""" - - - - {video_title} - 点击观看无水印视频 - view - 5 - 0 - - {video_url} - - - - - {cover} - - - - - - - - 0 - - - - - - - - 0 - - - - - - 0 - - - - Jyunere - 0 - - 1 - - - - - - """ - # 修改消息数据库里面的消息content 内容 - text_bytes = xml_message.encode('utf-8') - compressed_data = lb.compress(text_bytes, store_size=False).hex() - - data = self.wcf.query_sql('MSG0.db', "SELECT * FROM MSG where type = 49 limit 1") - self.wcf.query_sql('MSG0.db', - f"""UPDATE MSG SET CompressContent = x'{compressed_data}', BytesExtra=x'', type=49, SubType=3, - IsSender=0, TalkerId=2 WHERE MsgSvrID={data[0]['MsgSvrID']}""" - ) - - result = self.wcf.forward_msg(data[0]["MsgSvrID"], message.roomid) - print(f"视频链接:{result}")