支持抖音live实况图文按视频发送

- 修复抖音分享页 videoInfoRes 在新版 note 页面下的提取逻辑\n- 为图文页补充 note_pages 结构并识别 image.video 下的 live 实况视频地址\n- 命中 live 实况时优先按视频发送，失败再回退静态图发送
2026-05-06 13:48:56 +08:00
parent a97e2fc092
commit 53b6ddc05c
1 changed files with 154 additions and 24 deletions
--- a/plugins/douyin_parser/main.py
+++ b/plugins/douyin_parser/main.py
@@ -156,6 +156,37 @@ class DouyinParserPlugin(MessagePluginInterface):

            media_type = media_info.get('type', 'video')
            if media_type == 'image':
+                target_id = roomid if roomid else sender
+
+                # 图文作品改回“文本与图片分离发送”：
+                # 1. 文本单独发送，可读性更强，也方便用户直接复制文案；
+                # 2. 图片数量较少时保留原始逐张展示，避免小图文被强行拼成长图；
+                # 3. 图片较多时再合并，兼顾刷屏控制与浏览体验。
+                note_text = self._build_note_text(media_info)
+                if note_text:
+                    await bot.send_text_message(target_id, note_text)
+
+                note_pages = media_info.get('note_pages') or []
+                # live 实况图文会把动态部分塞进 image.video。
+                # 命中后优先按视频发送，发不出再回退到静态图，避免继续丢失动态内容。
+                if note_pages and any(page.get("media_type") == "video" for page in note_pages):
+                    sent_count = 0
+                    for page in note_pages:
+                        if page.get("media_type") == "video":
+                            video_bytes = self._download_first_available_video_bytes(page.get("video_candidates") or [])
+                            cover_bytes = self._download_first_available_image_bytes(page.get("cover_candidates") or [])
+                            if video_bytes:
+                                await bot.send_video_message(target_id, video_bytes, cover_bytes if cover_bytes else None)
+                                sent_count += 1
+                                continue
+                        image_bytes = self._download_first_available_image_bytes(page.get("image_candidates") or [])
+                        if image_bytes:
+                            await bot.send_image_message(target_id, image_bytes)
+                            sent_count += 1
+                    if sent_count:
+                        return True, f"发送图文/实况成功（{sent_count}页）"
+                    return False, "下载图文内容失败"
+
                image_candidates = media_info.get('image_candidates') or []
                if not image_candidates:
                    raw_images = media_info.get('images') or []
@@ -171,15 +202,6 @@ class DouyinParserPlugin(MessagePluginInterface):
                        img_bytes_list.append(b)
                if not img_bytes_list:
                    return False, "下载图片失败"
-                target_id = roomid if roomid else sender
-
-                # 图文作品改回“文本与图片分离发送”：
-                # 1. 文本单独发送，可读性更强，也方便用户直接复制文案；
-                # 2. 图片数量较少时保留原始逐张展示，避免小图文被强行拼成长图；
-                # 3. 图片较多时再合并，兼顾刷屏控制与浏览体验。
-                note_text = self._build_note_text(media_info)
-                if note_text:
-                    await bot.send_text_message(target_id, note_text)

                if len(img_bytes_list) > 3:
                    merged_pages = self._merge_images_vertical_paged(img_bytes_list, 1242, 65000)
@@ -408,12 +430,21 @@ class DouyinParserPlugin(MessagePluginInterface):
        for page_data in loader_data.values():
            if not isinstance(page_data, dict):
                continue
-            video_info = page_data.get("videoInfoRes")
-            if not isinstance(video_info, dict):
-                continue
-            item_list = video_info.get("item_list")
-            if isinstance(item_list, list) and item_list and isinstance(item_list[0], dict):
-                return item_list[0]
+            # 新版 note 页的 videoInfoRes 直接挂在当前节点，不再额外包一层 page dict。
+            direct_video_info = page_data.get("videoInfoRes")
+            if isinstance(direct_video_info, dict):
+                item_list = direct_video_info.get("item_list")
+                if isinstance(item_list, list) and item_list and isinstance(item_list[0], dict):
+                    return item_list[0]
+            for nested_page in page_data.values():
+                if not isinstance(nested_page, dict):
+                    continue
+                video_info = nested_page.get("videoInfoRes")
+                if not isinstance(video_info, dict):
+                    continue
+                item_list = video_info.get("item_list")
+                if isinstance(item_list, list) and item_list and isinstance(item_list[0], dict):
+                    return item_list[0]
        return None

    def _parse_note_item(self, item: Dict[str, Any]) -> Optional[Dict[str, Any]]:
@@ -422,7 +453,8 @@ class DouyinParserPlugin(MessagePluginInterface):

        这里保留每张图的候选 URL 列表，后续下载阶段可以逐个重试，提升图文成功率。
        """
-        image_url_groups = self._pick_image_url_groups(item)
+        note_pages = self._build_note_pages(item.get("images") or item.get("image_infos") or [])
+        image_url_groups = [page.get("image_candidates") or [] for page in note_pages if page.get("image_candidates")]
        if not image_url_groups:
            return None

@@ -432,6 +464,7 @@ class DouyinParserPlugin(MessagePluginInterface):
            "author": self._clean_text((item.get("author") or {}).get("nickname")),
            "images": [group[0] for group in image_url_groups if group],
            "image_candidates": image_url_groups,
+            "note_pages": note_pages,
            "cover": image_url_groups[0][0] if image_url_groups and image_url_groups[0] else "",
        }

@@ -458,6 +491,78 @@ class DouyinParserPlugin(MessagePluginInterface):
                seen_groups.add(group_key)
        return image_url_groups

+    def _build_note_pages(self, image_infos: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """把抖音图文页规整成统一的逐页发送描述。
+
+        设计说明：
+        1. 普通图文页只会带静态图候选地址；
+        2. live 实况页会额外在 image.video 里挂短视频；
+        3. 发送阶段只认这份结构，就能按页决定“发图片还是发视频”。
+        """
+        pages: List[Dict[str, Any]] = []
+        for image_info in image_infos or []:
+            if not isinstance(image_info, dict):
+                continue
+            image_candidates = self._dedupe_http_urls(
+                list(image_info.get("url_list") or []) + list(image_info.get("download_url_list") or [])
+            )
+            if not image_candidates:
+                continue
+            video_candidates = self._extract_live_photo_video_candidates(image_info)
+            cover_candidates = self._dedupe_http_urls(
+                list((((image_info.get("video") or {}).get("cover") or {}).get("url_list")) or []) + image_candidates
+            )
+            pages.append({
+                "media_type": "video" if video_candidates else "image",
+                "image_candidates": image_candidates,
+                "video_candidates": video_candidates,
+                "cover_candidates": cover_candidates,
+            })
+        return pages
+
+    def _extract_live_photo_video_candidates(self, image_info: Dict[str, Any]) -> List[str]:
+        """从 live 实况图的 image.video 里提取可发送视频地址。"""
+        video_info = image_info.get("video") or {}
+        if not isinstance(video_info, dict):
+            return []
+
+        ordered_groups: List[List[str]] = []
+        bit_rate_rows = sorted(
+            [row for row in (video_info.get("bit_rate") or []) if isinstance(row, dict)],
+            key=lambda row: row.get("bit_rate") or 0,
+            reverse=True,
+        )
+        for row in bit_rate_rows:
+            ordered_groups.append(list(((row.get("play_addr") or {}).get("url_list")) or []))
+        ordered_groups.extend([
+            list(((video_info.get("download_addr") or {}).get("url_list")) or []),
+            list(((video_info.get("play_addr_h264") or {}).get("url_list")) or []),
+            list(((video_info.get("play_addr") or {}).get("url_list")) or []),
+            list(((video_info.get("play_addr_lowbr") or {}).get("url_list")) or []),
+            list(((video_info.get("download_suffix_logo_addr") or {}).get("url_list")) or []),
+        ])
+
+        candidates: List[str] = []
+        for url_group in ordered_groups:
+            chosen = self._pick_video_url(url_group)
+            if chosen:
+                candidates.append(chosen)
+        return self._dedupe_http_urls(candidates)
+
+    def _dedupe_http_urls(self, urls: List[str]) -> List[str]:
+        """去重并规整 URL 列表，避免对同一地址重复下载。"""
+        cleaned_urls: List[str] = []
+        seen_urls = set()
+        for url in urls or []:
+            if not isinstance(url, str):
+                continue
+            decoded_url = self._decode_text(url).strip()
+            if not decoded_url.startswith("http") or decoded_url in seen_urls:
+                continue
+            cleaned_urls.append(decoded_url)
+            seen_urls.add(decoded_url)
+        return cleaned_urls
+
    def _parse_video_item(self, item: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        """从作品数据中解析视频作品，并优先挑选无水印播放地址。"""
        video = item.get("video")
@@ -668,16 +773,12 @@ class DouyinParserPlugin(MessagePluginInterface):
            author = (data.get("author") or {})
            nickname = author.get("nickname") or author.get("unique_id") or "未知作者"
            if aweme_type == 68 or (data.get("images") or data.get("image_list")):
-                images_field = data.get("images") or []
-                images: List[str] = []
-                for img in images_field:
-                    ulist = img.get("url_list") or img.get("download_url_list") or []
-                    chosen = self._prefer_image_url(ulist)
-                    if chosen:
-                        images.append(chosen)
+                images_field = data.get("images") or data.get("image_list") or []
+                note_pages = self._build_note_pages(images_field)
+                images = [page.get("image_candidates", [""])[0] for page in note_pages if page.get("image_candidates")]
                desc = data.get("desc") or data.get("caption") or ""
                result = {"type": "image", "images": images, "title": desc, "author": nickname,
-                          "cover": images[0] if images else ""}
+                          "note_pages": note_pages, "cover": images[0] if images else ""}
                if images:
                    return result
                return None
@@ -838,6 +939,24 @@ class DouyinParserPlugin(MessagePluginInterface):
        except Exception:
            return None

+    def _download_video_bytes(self, url: str) -> Optional[bytes]:
+        """下载短视频 bytes，供 live 实况页直接按视频发送。"""
+        try:
+            resp = requests.get(
+                url,
+                headers=self._build_request_headers(),
+                timeout=20,
+                proxies=self._build_proxies(),
+            )
+            if resp.status_code != 200:
+                return None
+            content_type = (resp.headers.get("Content-Type") or "").lower()
+            if "video" not in content_type and "application/octet-stream" not in content_type:
+                return None
+            return resp.content
+        except Exception:
+            return None
+
    def _merge_images_vertical(self, images: List[bytes], target_width: int = 1242) -> Optional[bytes]:
        try:
            pil_images: List[Image.Image] = []
@@ -924,6 +1043,17 @@ class DouyinParserPlugin(MessagePluginInterface):
                return image_bytes
        return None

+    def _download_first_available_video_bytes(self, candidates: List[str]) -> Optional[bytes]:
+        """按候选列表顺序下载第一段可用视频。"""
+        for candidate in candidates or []:
+            clean_candidate = self._clean_url(str(candidate or ""))
+            if not clean_candidate:
+                continue
+            video_bytes = self._download_video_bytes(clean_candidate)
+            if video_bytes:
+                return video_bytes
+        return None
+
    def _build_note_text(self, media_info: Dict[str, Any]) -> str:
        """
        构建图文作品的单独文本说明。