移除抖音解析中的yt-dlp链路并保留可用接口解析

变更项： - 删除抖音插件内所有 yt-dlp 解析与标准化实现代码，避免 Fresh cookies 问题 - 解析链路简化为：本地业务接口（内网）-> 外部接口兜底 - 移除 requirements 中 yt-dlp 依赖，减少无效依赖和运行噪音 - 使用你提供链接完成本地验证，确认可正常获取视频地址
2026-04-23 15:53:02 +08:00
parent 80829dbb21
commit 64a2253813
2 changed files with 0 additions and 184 deletions
--- a/plugins/douyin_parser/main.py
+++ b/plugins/douyin_parser/main.py
@@ -1,9 +1,6 @@
 import os
 import re
 import time
-import json
-import shutil
-import subprocess
 import traceback
 import requests
 import io
@@ -250,13 +247,6 @@ class DouyinParserPlugin(MessagePluginInterface):
            if secondary and (secondary.get('url') or secondary.get('images')):
                return self._clean_response_data(secondary)

-            # 第三优先级：本机兜底提取（yt-dlp）。
-            # 说明：
-            # - 该方案受 Cookie 新鲜度影响较大；
-            # - 放在最后可避免在“本地业务解析已成功”时仍输出 Fresh cookies 警告。
-            local_fallback = self._parse_from_local_extractor(clean_url)
-            if local_fallback and (local_fallback.get('url') or local_fallback.get('images')):
-                return self._clean_response_data(local_fallback)
            raise DouyinParserError("未获取到有效媒资数据")
        except Exception as e:
            self.LOG.error(f"[抖音] 解析过程发生未知错误: {str(e)}\n{traceback.format_exc()}")
@@ -543,179 +533,6 @@ class DouyinParserPlugin(MessagePluginInterface):
        except Exception:
            return None

-    def _parse_from_local_extractor(self, clean_url: str) -> Optional[Dict[str, Any]]:
-        """
-        本地提取后备方案（接口不可用时启用）。
-
-        设计目标：
-        1) 不依赖你自建/第三方解析接口，避免单点故障；
-        2) 优先使用 Python 方式，减少进程开销；
-        3) 若 Python 库未安装，自动降级到命令行，最大化可用性。
-        """
-        try:
-            info = self._extract_with_yt_dlp_python(clean_url)
-            if not info:
-                info = self._extract_with_yt_dlp_cli(clean_url)
-            if not info:
-                return None
-            return self._normalize_yt_dlp_info(info)
-        except Exception as e:
-            self.LOG.warning(f"[抖音] 本地提取后备失败: {e}")
-            return None
-
-    def _extract_with_yt_dlp_python(self, clean_url: str) -> Optional[Dict[str, Any]]:
-        """
-        使用 yt_dlp Python 库提取信息。
-
-        注意：
-        - skip_download=True 只提取元数据和直链，不下载文件；
-        - 优先选取“含视频轨道且协议为http/https”的格式，降低后续发送失败概率。
-        """
-        try:
-            import yt_dlp  # type: ignore
-        except Exception:
-            return None
-
-        ydl_opts = {
-            "quiet": True,
-            "no_warnings": True,
-            "skip_download": True,
-            "proxy": self.http_proxy or None,
-            "nocheckcertificate": True,
-        }
-        # Cookie 注入策略：
-        # - 优先使用 cookie_file（yt-dlp 官方支持的 cookies 文件，兼容性更高）；
-        # - 否则回退到手工 Cookie 请求头。
-        if self.cookie_file and os.path.exists(self.cookie_file):
-            ydl_opts["cookiefile"] = self.cookie_file
-        elif self.cookie:
-            ydl_opts["http_headers"] = {"Cookie": self.cookie}
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(clean_url, download=False)
-            if isinstance(info, dict):
-                return info
-            return None
-
-    def _extract_with_yt_dlp_cli(self, clean_url: str) -> Optional[Dict[str, Any]]:
-        """
-        使用 yt-dlp 命令行提取信息。
-
-        适用场景：
-        - 运行环境未安装 yt_dlp Python 包，但系统可执行文件已存在。
-        """
-        yt_dlp_bin = shutil.which("yt-dlp")
-        if not yt_dlp_bin:
-            return None
-        cmd = [yt_dlp_bin, "-J", "--no-warnings", "--skip-download", clean_url]
-        if self.http_proxy:
-            cmd.extend(["--proxy", self.http_proxy])
-        # 命令行模式下同样注入 Cookie，确保与 Python 模式行为一致。
-        if self.cookie_file and os.path.exists(self.cookie_file):
-            cmd.extend(["--cookies", self.cookie_file])
-        elif self.cookie:
-            cmd.extend(["--add-header", f"Cookie: {self.cookie}"])
-
-        result = subprocess.run(cmd, capture_output=True, text=True, timeout=25)
-        if result.returncode != 0:
-            err_msg = (result.stderr or "").strip().replace("\n", " ")
-            if "Fresh cookies" in err_msg:
-                # 该错误在抖音场景出现频率较高，且当前链路已是“最后兜底”，降为 info 避免误导。
-                self.LOG.info("[抖音] yt-dlp 兜底提取失败：Cookie 需要刷新（Fresh cookies needed）")
-            else:
-                self.LOG.warning(f"[抖音] yt-dlp 命令行提取失败: code={result.returncode}, err={err_msg[:200]}")
-            return None
-        try:
-            data = json.loads(result.stdout or "{}")
-            return data if isinstance(data, dict) else None
-        except Exception:
-            return None
-
-    def _normalize_yt_dlp_info(self, info: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-        """
-        将 yt-dlp 的原始结构统一映射为插件内部 media_info 结构。
-
-        目标结构：
-        - 视频：{"type":"video","url","title","author","cover"}
-        - 图集：{"type":"image","images":[],"title","author","cover"}
-        """
-        # 统一提取作者与标题，尽量优先更稳定字段，保证卡片/文本信息完整。
-        title = str(info.get("description") or info.get("title") or "无标题")
-        author = str(info.get("uploader") or info.get("creator") or info.get("channel") or "未知作者")
-
-        # 统一提取封面：
-        # 1) thumbnail 字段；
-        # 2) thumbnails 数组最后一项（通常分辨率更高）。
-        cover = str(info.get("thumbnail") or "")
-        if not cover:
-            thumbs = info.get("thumbnails") or []
-            if isinstance(thumbs, list) and thumbs:
-                last = thumbs[-1] if isinstance(thumbs[-1], dict) else {}
-                cover = str(last.get("url") or "")
-
-        # 图集场景：yt-dlp 可能返回 playlist/entries，每项通常是图片或片段资源。
-        if info.get("_type") == "playlist":
-            entries = info.get("entries") or []
-            image_urls: List[str] = []
-            if isinstance(entries, list):
-                for item in entries:
-                    if not isinstance(item, dict):
-                        continue
-                    # 优先取原始URL，其次取页面URL，再次取thumbnail。
-                    candidate = str(item.get("url") or item.get("webpage_url") or item.get("thumbnail") or "")
-                    if candidate and candidate.startswith("http"):
-                        image_urls.append(candidate)
-            if image_urls:
-                return {
-                    "type": "image",
-                    "images": image_urls,
-                    "title": title,
-                    "author": author,
-                    "cover": image_urls[0],
-                }
-
-        # 视频场景：优先从 formats 里选“有视频轨道”的直链，避免选到纯音频。
-        best_url = ""
-        formats = info.get("formats") or []
-        scored_candidates: List[Tuple[int, str]] = []
-        if isinstance(formats, list):
-            for fmt in formats:
-                if not isinstance(fmt, dict):
-                    continue
-                fmt_url = str(fmt.get("url") or "")
-                if not fmt_url or not fmt_url.startswith("http"):
-                    continue
-                # 必须含视频轨道（vcodec != none），并且协议优先 http/https。
-                vcodec = str(fmt.get("vcodec") or "")
-                protocol = str(fmt.get("protocol") or "")
-                if vcodec.lower() == "none":
-                    continue
-                score = 0
-                if protocol in ("https", "http"):
-                    score += 50
-                # 优先高分辨率与高码率。
-                score += int(fmt.get("height") or 0)
-                score += int(fmt.get("tbr") or 0) // 10
-                scored_candidates.append((score, fmt_url))
-        if scored_candidates:
-            scored_candidates.sort(key=lambda x: x[0], reverse=True)
-            best_url = scored_candidates[0][1]
-
-        # 部分站点会直接在顶层给 url 字段，作为兜底读取。
-        if not best_url:
-            fallback_url = str(info.get("url") or "")
-            if fallback_url.startswith("http"):
-                best_url = fallback_url
-
-        if best_url:
-            return {
-                "type": "video",
-                "url": best_url,
-                "title": title,
-                "author": author,
-                "cover": cover,
-            }
-        return None
-
    def _append_title_to_image(self, image_bytes: bytes, title: str) -> bytes:
        """
        将标题绘制到图片顶部，返回新的图片二进制数据。