From 105c45e90ba671e0e9f1a94f21f9e8feb397da7f Mon Sep 17 00:00:00 2001 From: liuwei Date: Tue, 11 Mar 2025 12:00:25 +0800 Subject: [PATCH] =?UTF-8?q?=E7=9C=9F=E5=AE=9E=E9=93=BE=E6=8E=A5=E6=B5=8B?= =?UTF-8?q?=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- douyin_parser/main.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/douyin_parser/main.py b/douyin_parser/main.py index eca909b..558740a 100644 --- a/douyin_parser/main.py +++ b/douyin_parser/main.py @@ -54,6 +54,7 @@ class DouyinParser: """获取真实视频链接""" max_retries = 3 # 最大重试次数 retry_delay = 2 # 重试延迟秒数 + max_redirects = 10 # 最大重定向次数,防止死循环 proxies = {"http": self.http_proxy, "https": self.http_proxy} if self.http_proxy else None redirect_history = [] @@ -67,28 +68,36 @@ class DouyinParser: 'Range': 'bytes=0-' } - current_url = video_url - while True: - response = requests.get(current_url, headers=headers, proxies=proxies, allow_redirects=False, - timeout=60) + # 默认使用 allow_redirects=True 获取历史记录 + response = requests.get(video_url, headers=headers, proxies=proxies, allow_redirects=True, timeout=60) + + if response.history: + redirect_history = [resp.url for resp in response.history] + real_url = response.url + else: + # response.history 为空,手动解析重定向 + current_url = video_url + for _ in range(max_redirects): # 限制最大重定向次数 + resp = requests.get(current_url, headers=headers, proxies=proxies, allow_redirects=False, + timeout=60) + new_url = resp.headers.get('Location') - if response.status_code in [301, 302, 303, 307, 308]: # 处理重定向 - new_url = response.headers.get('Location') if not new_url: - self.LOG.info("[抖音] 发现重定向但没有 Location 头部,停止解析") - break + break # 没有新的 Location,停止 + if not new_url.startswith("http"): from urllib.parse import urljoin new_url = urljoin(current_url, new_url) # 处理相对路径重定向 + if new_url in redirect_history: + self.LOG.info("[抖音] 检测到循环重定向: %s", new_url) + break # 避免死循环 + redirect_history.append(new_url) self.LOG.info("[抖音] 发现重定向: %s -> %s", current_url, new_url) current_url = new_url - else: - break # 没有进一步重定向,停止循环 - real_url = current_url - self.LOG.info("[抖音] 最终获取到的链接: %s", real_url) + real_url = current_url if redirect_history: self.LOG.info("[抖音] 重定向历史: %s", redirect_history)