支持抖音live实况图文按视频发送

- 修复抖音分享页 videoInfoRes 在新版 note 页面下的提取逻辑\n- 为图文页补充 note_pages 结构并识别 image.video 下的 live 实况视频地址\n- 命中 live 实况时优先按视频发送,失败再回退静态图发送
This commit is contained in:
liuwei
2026-05-06 13:48:56 +08:00
parent a97e2fc092
commit 53b6ddc05c

View File

@@ -156,6 +156,37 @@ class DouyinParserPlugin(MessagePluginInterface):
media_type = media_info.get('type', 'video') media_type = media_info.get('type', 'video')
if media_type == 'image': if media_type == 'image':
target_id = roomid if roomid else sender
# 图文作品改回“文本与图片分离发送”:
# 1. 文本单独发送,可读性更强,也方便用户直接复制文案;
# 2. 图片数量较少时保留原始逐张展示,避免小图文被强行拼成长图;
# 3. 图片较多时再合并,兼顾刷屏控制与浏览体验。
note_text = self._build_note_text(media_info)
if note_text:
await bot.send_text_message(target_id, note_text)
note_pages = media_info.get('note_pages') or []
# live 实况图文会把动态部分塞进 image.video。
# 命中后优先按视频发送,发不出再回退到静态图,避免继续丢失动态内容。
if note_pages and any(page.get("media_type") == "video" for page in note_pages):
sent_count = 0
for page in note_pages:
if page.get("media_type") == "video":
video_bytes = self._download_first_available_video_bytes(page.get("video_candidates") or [])
cover_bytes = self._download_first_available_image_bytes(page.get("cover_candidates") or [])
if video_bytes:
await bot.send_video_message(target_id, video_bytes, cover_bytes if cover_bytes else None)
sent_count += 1
continue
image_bytes = self._download_first_available_image_bytes(page.get("image_candidates") or [])
if image_bytes:
await bot.send_image_message(target_id, image_bytes)
sent_count += 1
if sent_count:
return True, f"发送图文/实况成功({sent_count}页)"
return False, "下载图文内容失败"
image_candidates = media_info.get('image_candidates') or [] image_candidates = media_info.get('image_candidates') or []
if not image_candidates: if not image_candidates:
raw_images = media_info.get('images') or [] raw_images = media_info.get('images') or []
@@ -171,15 +202,6 @@ class DouyinParserPlugin(MessagePluginInterface):
img_bytes_list.append(b) img_bytes_list.append(b)
if not img_bytes_list: if not img_bytes_list:
return False, "下载图片失败" return False, "下载图片失败"
target_id = roomid if roomid else sender
# 图文作品改回“文本与图片分离发送”:
# 1. 文本单独发送,可读性更强,也方便用户直接复制文案;
# 2. 图片数量较少时保留原始逐张展示,避免小图文被强行拼成长图;
# 3. 图片较多时再合并,兼顾刷屏控制与浏览体验。
note_text = self._build_note_text(media_info)
if note_text:
await bot.send_text_message(target_id, note_text)
if len(img_bytes_list) > 3: if len(img_bytes_list) > 3:
merged_pages = self._merge_images_vertical_paged(img_bytes_list, 1242, 65000) merged_pages = self._merge_images_vertical_paged(img_bytes_list, 1242, 65000)
@@ -408,12 +430,21 @@ class DouyinParserPlugin(MessagePluginInterface):
for page_data in loader_data.values(): for page_data in loader_data.values():
if not isinstance(page_data, dict): if not isinstance(page_data, dict):
continue continue
video_info = page_data.get("videoInfoRes") # 新版 note 页的 videoInfoRes 直接挂在当前节点,不再额外包一层 page dict。
if not isinstance(video_info, dict): direct_video_info = page_data.get("videoInfoRes")
continue if isinstance(direct_video_info, dict):
item_list = video_info.get("item_list") item_list = direct_video_info.get("item_list")
if isinstance(item_list, list) and item_list and isinstance(item_list[0], dict): if isinstance(item_list, list) and item_list and isinstance(item_list[0], dict):
return item_list[0] return item_list[0]
for nested_page in page_data.values():
if not isinstance(nested_page, dict):
continue
video_info = nested_page.get("videoInfoRes")
if not isinstance(video_info, dict):
continue
item_list = video_info.get("item_list")
if isinstance(item_list, list) and item_list and isinstance(item_list[0], dict):
return item_list[0]
return None return None
def _parse_note_item(self, item: Dict[str, Any]) -> Optional[Dict[str, Any]]: def _parse_note_item(self, item: Dict[str, Any]) -> Optional[Dict[str, Any]]:
@@ -422,7 +453,8 @@ class DouyinParserPlugin(MessagePluginInterface):
这里保留每张图的候选 URL 列表,后续下载阶段可以逐个重试,提升图文成功率。 这里保留每张图的候选 URL 列表,后续下载阶段可以逐个重试,提升图文成功率。
""" """
image_url_groups = self._pick_image_url_groups(item) note_pages = self._build_note_pages(item.get("images") or item.get("image_infos") or [])
image_url_groups = [page.get("image_candidates") or [] for page in note_pages if page.get("image_candidates")]
if not image_url_groups: if not image_url_groups:
return None return None
@@ -432,6 +464,7 @@ class DouyinParserPlugin(MessagePluginInterface):
"author": self._clean_text((item.get("author") or {}).get("nickname")), "author": self._clean_text((item.get("author") or {}).get("nickname")),
"images": [group[0] for group in image_url_groups if group], "images": [group[0] for group in image_url_groups if group],
"image_candidates": image_url_groups, "image_candidates": image_url_groups,
"note_pages": note_pages,
"cover": image_url_groups[0][0] if image_url_groups and image_url_groups[0] else "", "cover": image_url_groups[0][0] if image_url_groups and image_url_groups[0] else "",
} }
@@ -458,6 +491,78 @@ class DouyinParserPlugin(MessagePluginInterface):
seen_groups.add(group_key) seen_groups.add(group_key)
return image_url_groups return image_url_groups
def _build_note_pages(self, image_infos: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""把抖音图文页规整成统一的逐页发送描述。
设计说明:
1. 普通图文页只会带静态图候选地址;
2. live 实况页会额外在 image.video 里挂短视频;
3. 发送阶段只认这份结构,就能按页决定“发图片还是发视频”。
"""
pages: List[Dict[str, Any]] = []
for image_info in image_infos or []:
if not isinstance(image_info, dict):
continue
image_candidates = self._dedupe_http_urls(
list(image_info.get("url_list") or []) + list(image_info.get("download_url_list") or [])
)
if not image_candidates:
continue
video_candidates = self._extract_live_photo_video_candidates(image_info)
cover_candidates = self._dedupe_http_urls(
list((((image_info.get("video") or {}).get("cover") or {}).get("url_list")) or []) + image_candidates
)
pages.append({
"media_type": "video" if video_candidates else "image",
"image_candidates": image_candidates,
"video_candidates": video_candidates,
"cover_candidates": cover_candidates,
})
return pages
def _extract_live_photo_video_candidates(self, image_info: Dict[str, Any]) -> List[str]:
"""从 live 实况图的 image.video 里提取可发送视频地址。"""
video_info = image_info.get("video") or {}
if not isinstance(video_info, dict):
return []
ordered_groups: List[List[str]] = []
bit_rate_rows = sorted(
[row for row in (video_info.get("bit_rate") or []) if isinstance(row, dict)],
key=lambda row: row.get("bit_rate") or 0,
reverse=True,
)
for row in bit_rate_rows:
ordered_groups.append(list(((row.get("play_addr") or {}).get("url_list")) or []))
ordered_groups.extend([
list(((video_info.get("download_addr") or {}).get("url_list")) or []),
list(((video_info.get("play_addr_h264") or {}).get("url_list")) or []),
list(((video_info.get("play_addr") or {}).get("url_list")) or []),
list(((video_info.get("play_addr_lowbr") or {}).get("url_list")) or []),
list(((video_info.get("download_suffix_logo_addr") or {}).get("url_list")) or []),
])
candidates: List[str] = []
for url_group in ordered_groups:
chosen = self._pick_video_url(url_group)
if chosen:
candidates.append(chosen)
return self._dedupe_http_urls(candidates)
def _dedupe_http_urls(self, urls: List[str]) -> List[str]:
"""去重并规整 URL 列表,避免对同一地址重复下载。"""
cleaned_urls: List[str] = []
seen_urls = set()
for url in urls or []:
if not isinstance(url, str):
continue
decoded_url = self._decode_text(url).strip()
if not decoded_url.startswith("http") or decoded_url in seen_urls:
continue
cleaned_urls.append(decoded_url)
seen_urls.add(decoded_url)
return cleaned_urls
def _parse_video_item(self, item: Dict[str, Any]) -> Optional[Dict[str, Any]]: def _parse_video_item(self, item: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""从作品数据中解析视频作品,并优先挑选无水印播放地址。""" """从作品数据中解析视频作品,并优先挑选无水印播放地址。"""
video = item.get("video") video = item.get("video")
@@ -668,16 +773,12 @@ class DouyinParserPlugin(MessagePluginInterface):
author = (data.get("author") or {}) author = (data.get("author") or {})
nickname = author.get("nickname") or author.get("unique_id") or "未知作者" nickname = author.get("nickname") or author.get("unique_id") or "未知作者"
if aweme_type == 68 or (data.get("images") or data.get("image_list")): if aweme_type == 68 or (data.get("images") or data.get("image_list")):
images_field = data.get("images") or [] images_field = data.get("images") or data.get("image_list") or []
images: List[str] = [] note_pages = self._build_note_pages(images_field)
for img in images_field: images = [page.get("image_candidates", [""])[0] for page in note_pages if page.get("image_candidates")]
ulist = img.get("url_list") or img.get("download_url_list") or []
chosen = self._prefer_image_url(ulist)
if chosen:
images.append(chosen)
desc = data.get("desc") or data.get("caption") or "" desc = data.get("desc") or data.get("caption") or ""
result = {"type": "image", "images": images, "title": desc, "author": nickname, result = {"type": "image", "images": images, "title": desc, "author": nickname,
"cover": images[0] if images else ""} "note_pages": note_pages, "cover": images[0] if images else ""}
if images: if images:
return result return result
return None return None
@@ -838,6 +939,24 @@ class DouyinParserPlugin(MessagePluginInterface):
except Exception: except Exception:
return None return None
def _download_video_bytes(self, url: str) -> Optional[bytes]:
"""下载短视频 bytes供 live 实况页直接按视频发送。"""
try:
resp = requests.get(
url,
headers=self._build_request_headers(),
timeout=20,
proxies=self._build_proxies(),
)
if resp.status_code != 200:
return None
content_type = (resp.headers.get("Content-Type") or "").lower()
if "video" not in content_type and "application/octet-stream" not in content_type:
return None
return resp.content
except Exception:
return None
def _merge_images_vertical(self, images: List[bytes], target_width: int = 1242) -> Optional[bytes]: def _merge_images_vertical(self, images: List[bytes], target_width: int = 1242) -> Optional[bytes]:
try: try:
pil_images: List[Image.Image] = [] pil_images: List[Image.Image] = []
@@ -924,6 +1043,17 @@ class DouyinParserPlugin(MessagePluginInterface):
return image_bytes return image_bytes
return None return None
def _download_first_available_video_bytes(self, candidates: List[str]) -> Optional[bytes]:
"""按候选列表顺序下载第一段可用视频。"""
for candidate in candidates or []:
clean_candidate = self._clean_url(str(candidate or ""))
if not clean_candidate:
continue
video_bytes = self._download_video_bytes(clean_candidate)
if video_bytes:
return video_bytes
return None
def _build_note_text(self, media_info: Dict[str, Any]) -> str: def _build_note_text(self, media_info: Dict[str, Any]) -> str:
""" """
构建图文作品的单独文本说明。 构建图文作品的单独文本说明。