From 36569f84cc1751d1cfdcc95c8c1163a620a6e871 Mon Sep 17 00:00:00 2001 From: liuwei Date: Tue, 11 Mar 2025 11:45:36 +0800 Subject: [PATCH] =?UTF-8?q?=E7=9C=9F=E5=AE=9E=E9=93=BE=E6=8E=A5=E6=B5=8B?= =?UTF-8?q?=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- douyin_parser/main.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/douyin_parser/main.py b/douyin_parser/main.py index c10b0a5..eca909b 100644 --- a/douyin_parser/main.py +++ b/douyin_parser/main.py @@ -55,6 +55,7 @@ class DouyinParser: max_retries = 3 # 最大重试次数 retry_delay = 2 # 重试延迟秒数 proxies = {"http": self.http_proxy, "https": self.http_proxy} if self.http_proxy else None + redirect_history = [] for retry in range(max_retries): try: @@ -66,30 +67,42 @@ class DouyinParser: 'Range': 'bytes=0-' } - response = requests.get(video_url, headers=headers, proxies=proxies, allow_redirects=True, timeout=60) - if response.status_code in [200, 206]: - history = [resp.url for resp in response.history] - real_url = response.url + current_url = video_url + while True: + response = requests.get(current_url, headers=headers, proxies=proxies, allow_redirects=False, + timeout=60) - if history: - self.LOG.info("[抖音] 重定向历史: %s", history) + if response.status_code in [301, 302, 303, 307, 308]: # 处理重定向 + new_url = response.headers.get('Location') + if not new_url: + self.LOG.info("[抖音] 发现重定向但没有 Location 头部,停止解析") + break + if not new_url.startswith("http"): + from urllib.parse import urljoin + new_url = urljoin(current_url, new_url) # 处理相对路径重定向 - if real_url != video_url and ('v3-' in real_url.lower() or 'douyinvod.com' in real_url.lower()): - self.LOG.info("[抖音] 成功获取真实链接: %s", real_url) - return real_url + redirect_history.append(new_url) + self.LOG.info("[抖音] 发现重定向: %s -> %s", current_url, new_url) + current_url = new_url else: - self.LOG.warning("[抖音] 未能获取到真实视频链接,准备重试") - if retry < max_retries - 1: - time.sleep(retry_delay) - continue - return video_url + break # 没有进一步重定向,停止循环 + + real_url = current_url + self.LOG.info("[抖音] 最终获取到的链接: %s", real_url) + + if redirect_history: + self.LOG.info("[抖音] 重定向历史: %s", redirect_history) + + if real_url != video_url and ('v3-' in real_url.lower() or 'douyinvod.com' in real_url.lower()): + self.LOG.info("[抖音] 成功获取真实链接: %s", real_url) + return real_url else: - self.LOG.error("[抖音] 获取视频真实链接失败, 状态码: %d", response.status_code) - self.LOG.debug("[抖音] 响应头: %s", response.headers) + self.LOG.info("[抖音] 未能获取到符合预期的视频链接,准备重试") if retry < max_retries - 1: time.sleep(retry_delay) continue return video_url + except Exception as e: self.LOG.error("[抖音] 获取真实链接失败: %s (第%d次尝试)", str(e), retry + 1) if retry < max_retries - 1: