支持抖音live实况图文按视频发送
- 修复抖音分享页 videoInfoRes 在新版 note 页面下的提取逻辑\n- 为图文页补充 note_pages 结构并识别 image.video 下的 live 实况视频地址\n- 命中 live 实况时优先按视频发送,失败再回退静态图发送
This commit is contained in:
@@ -156,6 +156,37 @@ class DouyinParserPlugin(MessagePluginInterface):
|
||||
|
||||
media_type = media_info.get('type', 'video')
|
||||
if media_type == 'image':
|
||||
target_id = roomid if roomid else sender
|
||||
|
||||
# 图文作品改回“文本与图片分离发送”:
|
||||
# 1. 文本单独发送,可读性更强,也方便用户直接复制文案;
|
||||
# 2. 图片数量较少时保留原始逐张展示,避免小图文被强行拼成长图;
|
||||
# 3. 图片较多时再合并,兼顾刷屏控制与浏览体验。
|
||||
note_text = self._build_note_text(media_info)
|
||||
if note_text:
|
||||
await bot.send_text_message(target_id, note_text)
|
||||
|
||||
note_pages = media_info.get('note_pages') or []
|
||||
# live 实况图文会把动态部分塞进 image.video。
|
||||
# 命中后优先按视频发送,发不出再回退到静态图,避免继续丢失动态内容。
|
||||
if note_pages and any(page.get("media_type") == "video" for page in note_pages):
|
||||
sent_count = 0
|
||||
for page in note_pages:
|
||||
if page.get("media_type") == "video":
|
||||
video_bytes = self._download_first_available_video_bytes(page.get("video_candidates") or [])
|
||||
cover_bytes = self._download_first_available_image_bytes(page.get("cover_candidates") or [])
|
||||
if video_bytes:
|
||||
await bot.send_video_message(target_id, video_bytes, cover_bytes if cover_bytes else None)
|
||||
sent_count += 1
|
||||
continue
|
||||
image_bytes = self._download_first_available_image_bytes(page.get("image_candidates") or [])
|
||||
if image_bytes:
|
||||
await bot.send_image_message(target_id, image_bytes)
|
||||
sent_count += 1
|
||||
if sent_count:
|
||||
return True, f"发送图文/实况成功({sent_count}页)"
|
||||
return False, "下载图文内容失败"
|
||||
|
||||
image_candidates = media_info.get('image_candidates') or []
|
||||
if not image_candidates:
|
||||
raw_images = media_info.get('images') or []
|
||||
@@ -171,15 +202,6 @@ class DouyinParserPlugin(MessagePluginInterface):
|
||||
img_bytes_list.append(b)
|
||||
if not img_bytes_list:
|
||||
return False, "下载图片失败"
|
||||
target_id = roomid if roomid else sender
|
||||
|
||||
# 图文作品改回“文本与图片分离发送”:
|
||||
# 1. 文本单独发送,可读性更强,也方便用户直接复制文案;
|
||||
# 2. 图片数量较少时保留原始逐张展示,避免小图文被强行拼成长图;
|
||||
# 3. 图片较多时再合并,兼顾刷屏控制与浏览体验。
|
||||
note_text = self._build_note_text(media_info)
|
||||
if note_text:
|
||||
await bot.send_text_message(target_id, note_text)
|
||||
|
||||
if len(img_bytes_list) > 3:
|
||||
merged_pages = self._merge_images_vertical_paged(img_bytes_list, 1242, 65000)
|
||||
@@ -408,12 +430,21 @@ class DouyinParserPlugin(MessagePluginInterface):
|
||||
for page_data in loader_data.values():
|
||||
if not isinstance(page_data, dict):
|
||||
continue
|
||||
video_info = page_data.get("videoInfoRes")
|
||||
if not isinstance(video_info, dict):
|
||||
continue
|
||||
item_list = video_info.get("item_list")
|
||||
if isinstance(item_list, list) and item_list and isinstance(item_list[0], dict):
|
||||
return item_list[0]
|
||||
# 新版 note 页的 videoInfoRes 直接挂在当前节点,不再额外包一层 page dict。
|
||||
direct_video_info = page_data.get("videoInfoRes")
|
||||
if isinstance(direct_video_info, dict):
|
||||
item_list = direct_video_info.get("item_list")
|
||||
if isinstance(item_list, list) and item_list and isinstance(item_list[0], dict):
|
||||
return item_list[0]
|
||||
for nested_page in page_data.values():
|
||||
if not isinstance(nested_page, dict):
|
||||
continue
|
||||
video_info = nested_page.get("videoInfoRes")
|
||||
if not isinstance(video_info, dict):
|
||||
continue
|
||||
item_list = video_info.get("item_list")
|
||||
if isinstance(item_list, list) and item_list and isinstance(item_list[0], dict):
|
||||
return item_list[0]
|
||||
return None
|
||||
|
||||
def _parse_note_item(self, item: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
@@ -422,7 +453,8 @@ class DouyinParserPlugin(MessagePluginInterface):
|
||||
|
||||
这里保留每张图的候选 URL 列表,后续下载阶段可以逐个重试,提升图文成功率。
|
||||
"""
|
||||
image_url_groups = self._pick_image_url_groups(item)
|
||||
note_pages = self._build_note_pages(item.get("images") or item.get("image_infos") or [])
|
||||
image_url_groups = [page.get("image_candidates") or [] for page in note_pages if page.get("image_candidates")]
|
||||
if not image_url_groups:
|
||||
return None
|
||||
|
||||
@@ -432,6 +464,7 @@ class DouyinParserPlugin(MessagePluginInterface):
|
||||
"author": self._clean_text((item.get("author") or {}).get("nickname")),
|
||||
"images": [group[0] for group in image_url_groups if group],
|
||||
"image_candidates": image_url_groups,
|
||||
"note_pages": note_pages,
|
||||
"cover": image_url_groups[0][0] if image_url_groups and image_url_groups[0] else "",
|
||||
}
|
||||
|
||||
@@ -458,6 +491,78 @@ class DouyinParserPlugin(MessagePluginInterface):
|
||||
seen_groups.add(group_key)
|
||||
return image_url_groups
|
||||
|
||||
def _build_note_pages(self, image_infos: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""把抖音图文页规整成统一的逐页发送描述。
|
||||
|
||||
设计说明:
|
||||
1. 普通图文页只会带静态图候选地址;
|
||||
2. live 实况页会额外在 image.video 里挂短视频;
|
||||
3. 发送阶段只认这份结构,就能按页决定“发图片还是发视频”。
|
||||
"""
|
||||
pages: List[Dict[str, Any]] = []
|
||||
for image_info in image_infos or []:
|
||||
if not isinstance(image_info, dict):
|
||||
continue
|
||||
image_candidates = self._dedupe_http_urls(
|
||||
list(image_info.get("url_list") or []) + list(image_info.get("download_url_list") or [])
|
||||
)
|
||||
if not image_candidates:
|
||||
continue
|
||||
video_candidates = self._extract_live_photo_video_candidates(image_info)
|
||||
cover_candidates = self._dedupe_http_urls(
|
||||
list((((image_info.get("video") or {}).get("cover") or {}).get("url_list")) or []) + image_candidates
|
||||
)
|
||||
pages.append({
|
||||
"media_type": "video" if video_candidates else "image",
|
||||
"image_candidates": image_candidates,
|
||||
"video_candidates": video_candidates,
|
||||
"cover_candidates": cover_candidates,
|
||||
})
|
||||
return pages
|
||||
|
||||
def _extract_live_photo_video_candidates(self, image_info: Dict[str, Any]) -> List[str]:
|
||||
"""从 live 实况图的 image.video 里提取可发送视频地址。"""
|
||||
video_info = image_info.get("video") or {}
|
||||
if not isinstance(video_info, dict):
|
||||
return []
|
||||
|
||||
ordered_groups: List[List[str]] = []
|
||||
bit_rate_rows = sorted(
|
||||
[row for row in (video_info.get("bit_rate") or []) if isinstance(row, dict)],
|
||||
key=lambda row: row.get("bit_rate") or 0,
|
||||
reverse=True,
|
||||
)
|
||||
for row in bit_rate_rows:
|
||||
ordered_groups.append(list(((row.get("play_addr") or {}).get("url_list")) or []))
|
||||
ordered_groups.extend([
|
||||
list(((video_info.get("download_addr") or {}).get("url_list")) or []),
|
||||
list(((video_info.get("play_addr_h264") or {}).get("url_list")) or []),
|
||||
list(((video_info.get("play_addr") or {}).get("url_list")) or []),
|
||||
list(((video_info.get("play_addr_lowbr") or {}).get("url_list")) or []),
|
||||
list(((video_info.get("download_suffix_logo_addr") or {}).get("url_list")) or []),
|
||||
])
|
||||
|
||||
candidates: List[str] = []
|
||||
for url_group in ordered_groups:
|
||||
chosen = self._pick_video_url(url_group)
|
||||
if chosen:
|
||||
candidates.append(chosen)
|
||||
return self._dedupe_http_urls(candidates)
|
||||
|
||||
def _dedupe_http_urls(self, urls: List[str]) -> List[str]:
|
||||
"""去重并规整 URL 列表,避免对同一地址重复下载。"""
|
||||
cleaned_urls: List[str] = []
|
||||
seen_urls = set()
|
||||
for url in urls or []:
|
||||
if not isinstance(url, str):
|
||||
continue
|
||||
decoded_url = self._decode_text(url).strip()
|
||||
if not decoded_url.startswith("http") or decoded_url in seen_urls:
|
||||
continue
|
||||
cleaned_urls.append(decoded_url)
|
||||
seen_urls.add(decoded_url)
|
||||
return cleaned_urls
|
||||
|
||||
def _parse_video_item(self, item: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""从作品数据中解析视频作品,并优先挑选无水印播放地址。"""
|
||||
video = item.get("video")
|
||||
@@ -668,16 +773,12 @@ class DouyinParserPlugin(MessagePluginInterface):
|
||||
author = (data.get("author") or {})
|
||||
nickname = author.get("nickname") or author.get("unique_id") or "未知作者"
|
||||
if aweme_type == 68 or (data.get("images") or data.get("image_list")):
|
||||
images_field = data.get("images") or []
|
||||
images: List[str] = []
|
||||
for img in images_field:
|
||||
ulist = img.get("url_list") or img.get("download_url_list") or []
|
||||
chosen = self._prefer_image_url(ulist)
|
||||
if chosen:
|
||||
images.append(chosen)
|
||||
images_field = data.get("images") or data.get("image_list") or []
|
||||
note_pages = self._build_note_pages(images_field)
|
||||
images = [page.get("image_candidates", [""])[0] for page in note_pages if page.get("image_candidates")]
|
||||
desc = data.get("desc") or data.get("caption") or ""
|
||||
result = {"type": "image", "images": images, "title": desc, "author": nickname,
|
||||
"cover": images[0] if images else ""}
|
||||
"note_pages": note_pages, "cover": images[0] if images else ""}
|
||||
if images:
|
||||
return result
|
||||
return None
|
||||
@@ -838,6 +939,24 @@ class DouyinParserPlugin(MessagePluginInterface):
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _download_video_bytes(self, url: str) -> Optional[bytes]:
|
||||
"""下载短视频 bytes,供 live 实况页直接按视频发送。"""
|
||||
try:
|
||||
resp = requests.get(
|
||||
url,
|
||||
headers=self._build_request_headers(),
|
||||
timeout=20,
|
||||
proxies=self._build_proxies(),
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
content_type = (resp.headers.get("Content-Type") or "").lower()
|
||||
if "video" not in content_type and "application/octet-stream" not in content_type:
|
||||
return None
|
||||
return resp.content
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _merge_images_vertical(self, images: List[bytes], target_width: int = 1242) -> Optional[bytes]:
|
||||
try:
|
||||
pil_images: List[Image.Image] = []
|
||||
@@ -924,6 +1043,17 @@ class DouyinParserPlugin(MessagePluginInterface):
|
||||
return image_bytes
|
||||
return None
|
||||
|
||||
def _download_first_available_video_bytes(self, candidates: List[str]) -> Optional[bytes]:
|
||||
"""按候选列表顺序下载第一段可用视频。"""
|
||||
for candidate in candidates or []:
|
||||
clean_candidate = self._clean_url(str(candidate or ""))
|
||||
if not clean_candidate:
|
||||
continue
|
||||
video_bytes = self._download_video_bytes(clean_candidate)
|
||||
if video_bytes:
|
||||
return video_bytes
|
||||
return None
|
||||
|
||||
def _build_note_text(self, media_info: Dict[str, Any]) -> str:
|
||||
"""
|
||||
构建图文作品的单独文本说明。
|
||||
|
||||
Reference in New Issue
Block a user