From 53b6ddc05ce1b30ec7e1b89a013ba18197bec332 Mon Sep 17 00:00:00 2001 From: liuwei Date: Wed, 6 May 2026 13:48:56 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E6=8A=96=E9=9F=B3live?= =?UTF-8?q?=E5=AE=9E=E5=86=B5=E5=9B=BE=E6=96=87=E6=8C=89=E8=A7=86=E9=A2=91?= =?UTF-8?q?=E5=8F=91=E9=80=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 修复抖音分享页 videoInfoRes 在新版 note 页面下的提取逻辑\n- 为图文页补充 note_pages 结构并识别 image.video 下的 live 实况视频地址\n- 命中 live 实况时优先按视频发送,失败再回退静态图发送 --- plugins/douyin_parser/main.py | 178 +++++++++++++++++++++++++++++----- 1 file changed, 154 insertions(+), 24 deletions(-) diff --git a/plugins/douyin_parser/main.py b/plugins/douyin_parser/main.py index e4771f5..3e33208 100644 --- a/plugins/douyin_parser/main.py +++ b/plugins/douyin_parser/main.py @@ -156,6 +156,37 @@ class DouyinParserPlugin(MessagePluginInterface): media_type = media_info.get('type', 'video') if media_type == 'image': + target_id = roomid if roomid else sender + + # 图文作品改回“文本与图片分离发送”: + # 1. 文本单独发送,可读性更强,也方便用户直接复制文案; + # 2. 图片数量较少时保留原始逐张展示,避免小图文被强行拼成长图; + # 3. 图片较多时再合并,兼顾刷屏控制与浏览体验。 + note_text = self._build_note_text(media_info) + if note_text: + await bot.send_text_message(target_id, note_text) + + note_pages = media_info.get('note_pages') or [] + # live 实况图文会把动态部分塞进 image.video。 + # 命中后优先按视频发送,发不出再回退到静态图,避免继续丢失动态内容。 + if note_pages and any(page.get("media_type") == "video" for page in note_pages): + sent_count = 0 + for page in note_pages: + if page.get("media_type") == "video": + video_bytes = self._download_first_available_video_bytes(page.get("video_candidates") or []) + cover_bytes = self._download_first_available_image_bytes(page.get("cover_candidates") or []) + if video_bytes: + await bot.send_video_message(target_id, video_bytes, cover_bytes if cover_bytes else None) + sent_count += 1 + continue + image_bytes = self._download_first_available_image_bytes(page.get("image_candidates") or []) + if image_bytes: + await bot.send_image_message(target_id, image_bytes) + sent_count += 1 + if sent_count: + return True, f"发送图文/实况成功({sent_count}页)" + return False, "下载图文内容失败" + image_candidates = media_info.get('image_candidates') or [] if not image_candidates: raw_images = media_info.get('images') or [] @@ -171,15 +202,6 @@ class DouyinParserPlugin(MessagePluginInterface): img_bytes_list.append(b) if not img_bytes_list: return False, "下载图片失败" - target_id = roomid if roomid else sender - - # 图文作品改回“文本与图片分离发送”: - # 1. 文本单独发送,可读性更强,也方便用户直接复制文案; - # 2. 图片数量较少时保留原始逐张展示,避免小图文被强行拼成长图; - # 3. 图片较多时再合并,兼顾刷屏控制与浏览体验。 - note_text = self._build_note_text(media_info) - if note_text: - await bot.send_text_message(target_id, note_text) if len(img_bytes_list) > 3: merged_pages = self._merge_images_vertical_paged(img_bytes_list, 1242, 65000) @@ -408,12 +430,21 @@ class DouyinParserPlugin(MessagePluginInterface): for page_data in loader_data.values(): if not isinstance(page_data, dict): continue - video_info = page_data.get("videoInfoRes") - if not isinstance(video_info, dict): - continue - item_list = video_info.get("item_list") - if isinstance(item_list, list) and item_list and isinstance(item_list[0], dict): - return item_list[0] + # 新版 note 页的 videoInfoRes 直接挂在当前节点,不再额外包一层 page dict。 + direct_video_info = page_data.get("videoInfoRes") + if isinstance(direct_video_info, dict): + item_list = direct_video_info.get("item_list") + if isinstance(item_list, list) and item_list and isinstance(item_list[0], dict): + return item_list[0] + for nested_page in page_data.values(): + if not isinstance(nested_page, dict): + continue + video_info = nested_page.get("videoInfoRes") + if not isinstance(video_info, dict): + continue + item_list = video_info.get("item_list") + if isinstance(item_list, list) and item_list and isinstance(item_list[0], dict): + return item_list[0] return None def _parse_note_item(self, item: Dict[str, Any]) -> Optional[Dict[str, Any]]: @@ -422,7 +453,8 @@ class DouyinParserPlugin(MessagePluginInterface): 这里保留每张图的候选 URL 列表,后续下载阶段可以逐个重试,提升图文成功率。 """ - image_url_groups = self._pick_image_url_groups(item) + note_pages = self._build_note_pages(item.get("images") or item.get("image_infos") or []) + image_url_groups = [page.get("image_candidates") or [] for page in note_pages if page.get("image_candidates")] if not image_url_groups: return None @@ -432,6 +464,7 @@ class DouyinParserPlugin(MessagePluginInterface): "author": self._clean_text((item.get("author") or {}).get("nickname")), "images": [group[0] for group in image_url_groups if group], "image_candidates": image_url_groups, + "note_pages": note_pages, "cover": image_url_groups[0][0] if image_url_groups and image_url_groups[0] else "", } @@ -458,6 +491,78 @@ class DouyinParserPlugin(MessagePluginInterface): seen_groups.add(group_key) return image_url_groups + def _build_note_pages(self, image_infos: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """把抖音图文页规整成统一的逐页发送描述。 + + 设计说明: + 1. 普通图文页只会带静态图候选地址; + 2. live 实况页会额外在 image.video 里挂短视频; + 3. 发送阶段只认这份结构,就能按页决定“发图片还是发视频”。 + """ + pages: List[Dict[str, Any]] = [] + for image_info in image_infos or []: + if not isinstance(image_info, dict): + continue + image_candidates = self._dedupe_http_urls( + list(image_info.get("url_list") or []) + list(image_info.get("download_url_list") or []) + ) + if not image_candidates: + continue + video_candidates = self._extract_live_photo_video_candidates(image_info) + cover_candidates = self._dedupe_http_urls( + list((((image_info.get("video") or {}).get("cover") or {}).get("url_list")) or []) + image_candidates + ) + pages.append({ + "media_type": "video" if video_candidates else "image", + "image_candidates": image_candidates, + "video_candidates": video_candidates, + "cover_candidates": cover_candidates, + }) + return pages + + def _extract_live_photo_video_candidates(self, image_info: Dict[str, Any]) -> List[str]: + """从 live 实况图的 image.video 里提取可发送视频地址。""" + video_info = image_info.get("video") or {} + if not isinstance(video_info, dict): + return [] + + ordered_groups: List[List[str]] = [] + bit_rate_rows = sorted( + [row for row in (video_info.get("bit_rate") or []) if isinstance(row, dict)], + key=lambda row: row.get("bit_rate") or 0, + reverse=True, + ) + for row in bit_rate_rows: + ordered_groups.append(list(((row.get("play_addr") or {}).get("url_list")) or [])) + ordered_groups.extend([ + list(((video_info.get("download_addr") or {}).get("url_list")) or []), + list(((video_info.get("play_addr_h264") or {}).get("url_list")) or []), + list(((video_info.get("play_addr") or {}).get("url_list")) or []), + list(((video_info.get("play_addr_lowbr") or {}).get("url_list")) or []), + list(((video_info.get("download_suffix_logo_addr") or {}).get("url_list")) or []), + ]) + + candidates: List[str] = [] + for url_group in ordered_groups: + chosen = self._pick_video_url(url_group) + if chosen: + candidates.append(chosen) + return self._dedupe_http_urls(candidates) + + def _dedupe_http_urls(self, urls: List[str]) -> List[str]: + """去重并规整 URL 列表,避免对同一地址重复下载。""" + cleaned_urls: List[str] = [] + seen_urls = set() + for url in urls or []: + if not isinstance(url, str): + continue + decoded_url = self._decode_text(url).strip() + if not decoded_url.startswith("http") or decoded_url in seen_urls: + continue + cleaned_urls.append(decoded_url) + seen_urls.add(decoded_url) + return cleaned_urls + def _parse_video_item(self, item: Dict[str, Any]) -> Optional[Dict[str, Any]]: """从作品数据中解析视频作品,并优先挑选无水印播放地址。""" video = item.get("video") @@ -668,16 +773,12 @@ class DouyinParserPlugin(MessagePluginInterface): author = (data.get("author") or {}) nickname = author.get("nickname") or author.get("unique_id") or "未知作者" if aweme_type == 68 or (data.get("images") or data.get("image_list")): - images_field = data.get("images") or [] - images: List[str] = [] - for img in images_field: - ulist = img.get("url_list") or img.get("download_url_list") or [] - chosen = self._prefer_image_url(ulist) - if chosen: - images.append(chosen) + images_field = data.get("images") or data.get("image_list") or [] + note_pages = self._build_note_pages(images_field) + images = [page.get("image_candidates", [""])[0] for page in note_pages if page.get("image_candidates")] desc = data.get("desc") or data.get("caption") or "" result = {"type": "image", "images": images, "title": desc, "author": nickname, - "cover": images[0] if images else ""} + "note_pages": note_pages, "cover": images[0] if images else ""} if images: return result return None @@ -838,6 +939,24 @@ class DouyinParserPlugin(MessagePluginInterface): except Exception: return None + def _download_video_bytes(self, url: str) -> Optional[bytes]: + """下载短视频 bytes,供 live 实况页直接按视频发送。""" + try: + resp = requests.get( + url, + headers=self._build_request_headers(), + timeout=20, + proxies=self._build_proxies(), + ) + if resp.status_code != 200: + return None + content_type = (resp.headers.get("Content-Type") or "").lower() + if "video" not in content_type and "application/octet-stream" not in content_type: + return None + return resp.content + except Exception: + return None + def _merge_images_vertical(self, images: List[bytes], target_width: int = 1242) -> Optional[bytes]: try: pil_images: List[Image.Image] = [] @@ -924,6 +1043,17 @@ class DouyinParserPlugin(MessagePluginInterface): return image_bytes return None + def _download_first_available_video_bytes(self, candidates: List[str]) -> Optional[bytes]: + """按候选列表顺序下载第一段可用视频。""" + for candidate in candidates or []: + clean_candidate = self._clean_url(str(candidate or "")) + if not clean_candidate: + continue + video_bytes = self._download_video_bytes(clean_candidate) + if video_bytes: + return video_bytes + return None + def _build_note_text(self, media_info: Dict[str, Any]) -> str: """ 构建图文作品的单独文本说明。