抖音视频-服务器无法提取，暂时不处理

2025-03-12 10:19:22 +08:00
parent f1cc064b4d
commit d69d6cde98
2 changed files with 41 additions and 243 deletions
--- a/douyin_parser/douyin_test.py
+++ b/douyin_parser/douyin_test.py
@@ -1,202 +0,0 @@
-import logging
-import os
-import re
-import time
-import tomllib
-import traceback
-import requests
-from typing import Dict, Any
-
-from wcferry import WxMsg, Wcf
-
-from robot_cmd.robot_command import GroupBotManager, Feature, PermissionStatus
-
-
-class DouyinParserError(Exception):
-    """抖音解析器自定义异常基类"""
-    pass
-
-
-def _clean_response_data(data: Dict[str, Any]) -> Dict[str, Any]:
-    if not data:
-        return data
-    data[
-        'cover'] = "https://is1-ssl.mzstatic.com/image/thumb/Purple221/v4/7c/49/e1/7c49e1af-ce92-d1c4-9a93-0a316e47ba94/AppIcon_TikTok-0-0-1x_U007epad-0-1-0-0-85-220.png/512x512bb.jpg"
-
-    return data
-
-
-def _clean_url(url: str) -> str:
-    cleaned_url = url.strip().replace(';', '').replace('\n', '').replace('\r', '')
-    print("[抖音] 清理后的URL: %s", cleaned_url)
-    return cleaned_url
-
-
-def _get_real_video_url(video_url: str) -> str:
-    """获取真实视频链接"""
-    max_retries = 3  # 最大重试次数
-    retry_delay = 2  # 重试延迟秒数
-    max_redirects = 10  # 最大重定向次数，防止死循环
-    proxies = None
-    redirect_history = []
-
-    for retry in range(max_retries):
-        try:
-            print("[抖音] 开始获取真实视频链接: %s (第%d次尝试)", video_url, retry + 1)
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
-                'Range': 'bytes=0-'
-            }
-
-            # 默认使用 allow_redirects=True 获取历史记录
-            response = requests.get(video_url, headers=headers, proxies=proxies, allow_redirects=True, timeout=60)
-
-            if response.history:
-                redirect_history = [resp.url for resp in response.history]
-                real_url = response.url
-            else:
-                # response.history 为空，手动解析重定向
-                current_url = video_url
-                for _ in range(max_redirects):  # 限制最大重定向次数
-                    resp = requests.get(current_url, headers=headers, proxies=proxies, allow_redirects=False,
-                                        timeout=60)
-                    new_url = resp.headers.get('Location')
-
-                    if not new_url:
-                        break  # 没有新的 Location，停止
-
-                    if not new_url.startswith("http"):
-                        from urllib.parse import urljoin
-                        new_url = urljoin(current_url, new_url)  # 处理相对路径重定向
-
-                    if new_url in redirect_history:
-                        print("[抖音] 检测到循环重定向: %s", new_url)
-                        break  # 避免死循环
-
-                    redirect_history.append(new_url)
-                    print("[抖音] 发现重定向: %s -> %s", current_url, new_url)
-                    current_url = new_url
-
-                real_url = current_url
-
-            if redirect_history:
-                print("[抖音] 重定向历史: %s", redirect_history)
-
-            if real_url != video_url and ('v3-' in real_url.lower() or 'douyinvod.com' in real_url.lower()):
-                print("[抖音] 成功获取真实链接: %s", real_url)
-                return real_url
-            else:
-                print("[抖音] 未能获取到符合预期的视频链接，准备重试")
-                if retry < max_retries - 1:
-                    time.sleep(retry_delay)
-                    continue
-                return video_url
-
-        except Exception as e:
-            print("[抖音] 获取真实链接失败: %s (第%d次尝试)", str(e), retry + 1)
-            if retry < max_retries - 1:
-                time.sleep(retry_delay)
-                continue
-            return video_url
-
-    print("[抖音] 获取真实链接失败，已达到最大重试次数")
-    return video_url
-
-
-def _parse_douyin(url: str) -> Dict[str, Any]:
-    try:
-        api_url = "https://apih.kfcgw50.me/api/douyin"
-        clean_url = _clean_url(url)
-        params = {'url': clean_url, 'type': 'json'}
-
-        print("[抖音] 请求API: %s, 参数: %s", api_url, repr(params))
-        proxy = None
-        response = requests.get(api_url, params=params, timeout=30, proxies=proxy)
-
-        if response.status_code != 200:
-            raise DouyinParserError(f"API请求失败，状态码: {response.status_code}")
-
-        data = response.json()
-        print("[抖音] API响应数据: %s", data)
-
-        if data.get("code") == 200:
-            result = data.get("data", {})
-            if result.get('video'):
-                result['video'] = _get_real_video_url(result['video'])
-            return _clean_response_data(result)
-        else:
-            raise DouyinParserError(data.get("message", "未知错误"))
-    except Exception as e:
-        print("[抖音] 解析过程发生未知错误: %s\n%s", str(e), traceback.format_exc())
-        raise DouyinParserError(f"未知错误: {str(e)}")
-
-
-def handle_douyin_links(content):
-    try:
-
-        original_url = _clean_url(content)
-        print("发现抖音链接: %s", original_url)
-        print("检测到抖音分享链接，正在解析无水印视频...")
-        video_info = _parse_douyin(original_url)
-        if not video_info:
-            raise DouyinParserError("无法获取视频信息")
-
-        video_url = video_info.get('video', '')
-        title = video_info.get('title', '无标题')
-        author = video_info.get('name', '未知作者')
-        cover = video_info.get('cover', '')
-
-        if not video_url:
-            raise DouyinParserError("无法获取视频地址")
-        print(video_url)
-    except Exception as e:
-        print("[抖音] 解析过程发生未知错误: %s\n%s", str(e), traceback.format_exc())
-        raise DouyinParserError(f"未知错误: {str(e)}")
-    return video_url
-
-def download_stream(url, save_path):
-    """
-    从指定URL读取视频流并保存到本地
-    :param url: 视频流的URL
-    :param save_path: 本地保存路径（包含文件名，例如 "video.mp4"）
-    """
-    try:
-        # 发送GET请求，启用流式传输
-        response = requests.get(url, stream=True)
-
-        # 检查请求是否成功
-        response.raise_for_status()  # 如果状态码不是200，将抛出异常
-
-        # 确保保存路径的目录存在
-        os.makedirs(os.path.dirname(save_path) or ".", exist_ok=True)
-
-        # 检查是否是视频流（可选，根据Content-Type判断）
-        content_type = response.headers.get("Content-Type", "").lower()
-        if "video" not in content_type and "application/octet-stream" not in content_type:
-            print(f"警告: 返回的可能不是视频流，Content-Type: {content_type}")
-            print("响应内容预览:", response.text[:100])  # 打印前100字符查看
-            return
-
-        # 以二进制写入模式保存流数据
-        with open(save_path, "wb") as file:
-            for chunk in response.iter_content(chunk_size=1024):  # 分块读取，每块1KB
-                if chunk:  # 过滤空块
-                    file.write(chunk)
-        print(f"视频已下载到: {save_path}")
-        return os.path.abspath(save_path)
-    except requests.RequestException as e:
-        print(f"请求失败: {e}")
-    except IOError as e:
-        print(f"文件写入失败: {e}")
-    except Exception as e:
-        print(f"发生未知错误: {e}")
-
-
-if __name__ == '__main__':
-    str = """https://v.douyin.com/i5gm3gKs/"""
-
-    url = handle_douyin_links(str)
-    # download_stream(url,"douyin.mp4")
-
--- a/douyin_parser/main.py
+++ b/douyin_parser/main.py
@@ -181,48 +181,48 @@ class DouyinParser:

            self.wcf.send_rich_text("bot", "gh_11", title[:30], f"{title[:30]} - {author[:10]}", video_url, cover,
                                    message.roomid)
-            self.LOG.info(f"video_url: {video_url}, title: {title}, author: {author}, cover: {cover}")
-            mp4_path = self.download_stream(video_url, "douyin_parser/down_load_dir/douyin.mp4")
-            self.LOG.info(f"发送抖音视频：{mp4_path}")
-            self.wcf.send_file(mp4_path, message.roomid)
+            # self.LOG.info(f"video_url: {video_url}, title: {title}, author: {author}, cover: {cover}")
+            # mp4_path = self.download_stream(video_url, "douyin_parser/down_load_dir/douyin.mp4")
+            # self.LOG.info(f"发送抖音视频：{mp4_path}")
+            # self.wcf.send_file(mp4_path, message.roomid)
        except Exception as e:
            self.LOG.error("[抖音] 解析过程发生未知错误: %s\n%s", str(e), traceback.format_exc())
            raise DouyinParserError(f"未知错误: {str(e)}")
        return
-
-    def download_stream(self, url, save_path):
-        """
-        从指定URL读取视频流并保存到本地
-        :param url: 视频流的URL
-        :param save_path: 本地保存路径（包含文件名，例如 "video.mp4"）
-        """
-        try:
-            # 发送GET请求，启用流式传输
-            response = requests.get(url, stream=True)
-
-            # 检查请求是否成功
-            response.raise_for_status()  # 如果状态码不是200，将抛出异常
-
-            # 确保保存路径的目录存在
-            os.makedirs(os.path.dirname(save_path) or ".", exist_ok=True)
-
-            # 检查是否是视频流（可选，根据Content-Type判断）
-            content_type = response.headers.get("Content-Type", "").lower()
-            if "video" not in content_type and "application/octet-stream" not in content_type:
-                print(f"警告: 返回的可能不是视频流，Content-Type: {content_type}")
-                print("响应内容预览:", response.text[:100])  # 打印前100字符查看
-                return
-
-            # 以二进制写入模式保存流数据
-            with open(save_path, "wb") as file:
-                for chunk in response.iter_content(chunk_size=1024):  # 分块读取，每块1KB
-                    if chunk:  # 过滤空块
-                        file.write(chunk)
-            print(f"视频已下载到: {save_path}")
-            return os.path.abspath(save_path)
-        except requests.RequestException as e:
-            print(f"请求失败: {e}")
-        except IOError as e:
-            print(f"文件写入失败: {e}")
-        except Exception as e:
-            print(f"发生未知错误: {e}")
+    #
+    # def download_stream(self, url, save_path):
+    #     """
+    #     从指定URL读取视频流并保存到本地
+    #     :param url: 视频流的URL
+    #     :param save_path: 本地保存路径（包含文件名，例如 "video.mp4"）
+    #     """
+    #     try:
+    #         # 发送GET请求，启用流式传输
+    #         response = requests.get(url, stream=True)
+    #
+    #         # 检查请求是否成功
+    #         response.raise_for_status()  # 如果状态码不是200，将抛出异常
+    #
+    #         # 确保保存路径的目录存在
+    #         os.makedirs(os.path.dirname(save_path) or ".", exist_ok=True)
+    #
+    #         # 检查是否是视频流（可选，根据Content-Type判断）
+    #         content_type = response.headers.get("Content-Type", "").lower()
+    #         if "video" not in content_type and "application/octet-stream" not in content_type:
+    #             print(f"警告: 返回的可能不是视频流，Content-Type: {content_type}")
+    #             print("响应内容预览:", response.text[:100])  # 打印前100字符查看
+    #             return
+    #
+    #         # 以二进制写入模式保存流数据
+    #         with open(save_path, "wb") as file:
+    #             for chunk in response.iter_content(chunk_size=1024):  # 分块读取，每块1KB
+    #                 if chunk:  # 过滤空块
+    #                     file.write(chunk)
+    #         print(f"视频已下载到: {save_path}")
+    #         return os.path.abspath(save_path)
+    #     except requests.RequestException as e:
+    #         print(f"请求失败: {e}")
+    #     except IOError as e:
+    #         print(f"文件写入失败: {e}")
+    #     except Exception as e:
+    #         print(f"发生未知错误: {e}")