移除抖音解析中的yt-dlp链路并保留可用接口解析
变更项: - 删除抖音插件内所有 yt-dlp 解析与标准化实现代码,避免 Fresh cookies 问题 - 解析链路简化为:本地业务接口(内网)-> 外部接口兜底 - 移除 requirements 中 yt-dlp 依赖,减少无效依赖和运行噪音 - 使用你提供链接完成本地验证,确认可正常获取视频地址
This commit is contained in:
@@ -1,9 +1,6 @@
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
import traceback
|
||||
import requests
|
||||
import io
|
||||
@@ -250,13 +247,6 @@ class DouyinParserPlugin(MessagePluginInterface):
|
||||
if secondary and (secondary.get('url') or secondary.get('images')):
|
||||
return self._clean_response_data(secondary)
|
||||
|
||||
# 第三优先级:本机兜底提取(yt-dlp)。
|
||||
# 说明:
|
||||
# - 该方案受 Cookie 新鲜度影响较大;
|
||||
# - 放在最后可避免在“本地业务解析已成功”时仍输出 Fresh cookies 警告。
|
||||
local_fallback = self._parse_from_local_extractor(clean_url)
|
||||
if local_fallback and (local_fallback.get('url') or local_fallback.get('images')):
|
||||
return self._clean_response_data(local_fallback)
|
||||
raise DouyinParserError("未获取到有效媒资数据")
|
||||
except Exception as e:
|
||||
self.LOG.error(f"[抖音] 解析过程发生未知错误: {str(e)}\n{traceback.format_exc()}")
|
||||
@@ -543,179 +533,6 @@ class DouyinParserPlugin(MessagePluginInterface):
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _parse_from_local_extractor(self, clean_url: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
本地提取后备方案(接口不可用时启用)。
|
||||
|
||||
设计目标:
|
||||
1) 不依赖你自建/第三方解析接口,避免单点故障;
|
||||
2) 优先使用 Python 方式,减少进程开销;
|
||||
3) 若 Python 库未安装,自动降级到命令行,最大化可用性。
|
||||
"""
|
||||
try:
|
||||
info = self._extract_with_yt_dlp_python(clean_url)
|
||||
if not info:
|
||||
info = self._extract_with_yt_dlp_cli(clean_url)
|
||||
if not info:
|
||||
return None
|
||||
return self._normalize_yt_dlp_info(info)
|
||||
except Exception as e:
|
||||
self.LOG.warning(f"[抖音] 本地提取后备失败: {e}")
|
||||
return None
|
||||
|
||||
def _extract_with_yt_dlp_python(self, clean_url: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
使用 yt_dlp Python 库提取信息。
|
||||
|
||||
注意:
|
||||
- skip_download=True 只提取元数据和直链,不下载文件;
|
||||
- 优先选取“含视频轨道且协议为http/https”的格式,降低后续发送失败概率。
|
||||
"""
|
||||
try:
|
||||
import yt_dlp # type: ignore
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
ydl_opts = {
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"skip_download": True,
|
||||
"proxy": self.http_proxy or None,
|
||||
"nocheckcertificate": True,
|
||||
}
|
||||
# Cookie 注入策略:
|
||||
# - 优先使用 cookie_file(yt-dlp 官方支持的 cookies 文件,兼容性更高);
|
||||
# - 否则回退到手工 Cookie 请求头。
|
||||
if self.cookie_file and os.path.exists(self.cookie_file):
|
||||
ydl_opts["cookiefile"] = self.cookie_file
|
||||
elif self.cookie:
|
||||
ydl_opts["http_headers"] = {"Cookie": self.cookie}
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(clean_url, download=False)
|
||||
if isinstance(info, dict):
|
||||
return info
|
||||
return None
|
||||
|
||||
def _extract_with_yt_dlp_cli(self, clean_url: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
使用 yt-dlp 命令行提取信息。
|
||||
|
||||
适用场景:
|
||||
- 运行环境未安装 yt_dlp Python 包,但系统可执行文件已存在。
|
||||
"""
|
||||
yt_dlp_bin = shutil.which("yt-dlp")
|
||||
if not yt_dlp_bin:
|
||||
return None
|
||||
cmd = [yt_dlp_bin, "-J", "--no-warnings", "--skip-download", clean_url]
|
||||
if self.http_proxy:
|
||||
cmd.extend(["--proxy", self.http_proxy])
|
||||
# 命令行模式下同样注入 Cookie,确保与 Python 模式行为一致。
|
||||
if self.cookie_file and os.path.exists(self.cookie_file):
|
||||
cmd.extend(["--cookies", self.cookie_file])
|
||||
elif self.cookie:
|
||||
cmd.extend(["--add-header", f"Cookie: {self.cookie}"])
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=25)
|
||||
if result.returncode != 0:
|
||||
err_msg = (result.stderr or "").strip().replace("\n", " ")
|
||||
if "Fresh cookies" in err_msg:
|
||||
# 该错误在抖音场景出现频率较高,且当前链路已是“最后兜底”,降为 info 避免误导。
|
||||
self.LOG.info("[抖音] yt-dlp 兜底提取失败:Cookie 需要刷新(Fresh cookies needed)")
|
||||
else:
|
||||
self.LOG.warning(f"[抖音] yt-dlp 命令行提取失败: code={result.returncode}, err={err_msg[:200]}")
|
||||
return None
|
||||
try:
|
||||
data = json.loads(result.stdout or "{}")
|
||||
return data if isinstance(data, dict) else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _normalize_yt_dlp_info(self, info: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
将 yt-dlp 的原始结构统一映射为插件内部 media_info 结构。
|
||||
|
||||
目标结构:
|
||||
- 视频:{"type":"video","url","title","author","cover"}
|
||||
- 图集:{"type":"image","images":[],"title","author","cover"}
|
||||
"""
|
||||
# 统一提取作者与标题,尽量优先更稳定字段,保证卡片/文本信息完整。
|
||||
title = str(info.get("description") or info.get("title") or "无标题")
|
||||
author = str(info.get("uploader") or info.get("creator") or info.get("channel") or "未知作者")
|
||||
|
||||
# 统一提取封面:
|
||||
# 1) thumbnail 字段;
|
||||
# 2) thumbnails 数组最后一项(通常分辨率更高)。
|
||||
cover = str(info.get("thumbnail") or "")
|
||||
if not cover:
|
||||
thumbs = info.get("thumbnails") or []
|
||||
if isinstance(thumbs, list) and thumbs:
|
||||
last = thumbs[-1] if isinstance(thumbs[-1], dict) else {}
|
||||
cover = str(last.get("url") or "")
|
||||
|
||||
# 图集场景:yt-dlp 可能返回 playlist/entries,每项通常是图片或片段资源。
|
||||
if info.get("_type") == "playlist":
|
||||
entries = info.get("entries") or []
|
||||
image_urls: List[str] = []
|
||||
if isinstance(entries, list):
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
# 优先取原始URL,其次取页面URL,再次取thumbnail。
|
||||
candidate = str(item.get("url") or item.get("webpage_url") or item.get("thumbnail") or "")
|
||||
if candidate and candidate.startswith("http"):
|
||||
image_urls.append(candidate)
|
||||
if image_urls:
|
||||
return {
|
||||
"type": "image",
|
||||
"images": image_urls,
|
||||
"title": title,
|
||||
"author": author,
|
||||
"cover": image_urls[0],
|
||||
}
|
||||
|
||||
# 视频场景:优先从 formats 里选“有视频轨道”的直链,避免选到纯音频。
|
||||
best_url = ""
|
||||
formats = info.get("formats") or []
|
||||
scored_candidates: List[Tuple[int, str]] = []
|
||||
if isinstance(formats, list):
|
||||
for fmt in formats:
|
||||
if not isinstance(fmt, dict):
|
||||
continue
|
||||
fmt_url = str(fmt.get("url") or "")
|
||||
if not fmt_url or not fmt_url.startswith("http"):
|
||||
continue
|
||||
# 必须含视频轨道(vcodec != none),并且协议优先 http/https。
|
||||
vcodec = str(fmt.get("vcodec") or "")
|
||||
protocol = str(fmt.get("protocol") or "")
|
||||
if vcodec.lower() == "none":
|
||||
continue
|
||||
score = 0
|
||||
if protocol in ("https", "http"):
|
||||
score += 50
|
||||
# 优先高分辨率与高码率。
|
||||
score += int(fmt.get("height") or 0)
|
||||
score += int(fmt.get("tbr") or 0) // 10
|
||||
scored_candidates.append((score, fmt_url))
|
||||
if scored_candidates:
|
||||
scored_candidates.sort(key=lambda x: x[0], reverse=True)
|
||||
best_url = scored_candidates[0][1]
|
||||
|
||||
# 部分站点会直接在顶层给 url 字段,作为兜底读取。
|
||||
if not best_url:
|
||||
fallback_url = str(info.get("url") or "")
|
||||
if fallback_url.startswith("http"):
|
||||
best_url = fallback_url
|
||||
|
||||
if best_url:
|
||||
return {
|
||||
"type": "video",
|
||||
"url": best_url,
|
||||
"title": title,
|
||||
"author": author,
|
||||
"cover": cover,
|
||||
}
|
||||
return None
|
||||
|
||||
def _append_title_to_image(self, image_bytes: bytes, title: str) -> bytes:
|
||||
"""
|
||||
将标题绘制到图片顶部,返回新的图片二进制数据。
|
||||
|
||||
Reference in New Issue
Block a user