From 50b01da658ad69f8ba1f8078eedea96e46b38893 Mon Sep 17 00:00:00 2001 From: liuwei Date: Mon, 14 Apr 2025 16:25:23 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4=E8=8E=B7=E5=8F=96=E7=9A=84?= =?UTF-8?q?=E9=80=BB=E8=BE=91=EF=BC=8C=E5=85=BC=E5=AE=B9=E9=83=A8=E5=88=86?= =?UTF-8?q?=E6=9C=89=E6=A0=87=E9=A2=98=E7=9A=84=E6=96=B0=E9=97=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugins/global_news/news_crawler.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/plugins/global_news/news_crawler.py b/plugins/global_news/news_crawler.py index af9863e..2e1486c 100644 --- a/plugins/global_news/news_crawler.py +++ b/plugins/global_news/news_crawler.py @@ -186,13 +186,14 @@ def abc(): continue html = etree.HTML(response.text) - title = html.xpath('//div[@data-testid="prism-headline"]/h1/text()') + # 修改 xpath 以支持新的标题结构 + title = html.xpath('//div[@data-testid="prism-headline"]/h1/text() | //div[@data-testid="prism-headline"]//span/text()') if not title: logger.warning(f'跳过视频或其他类型新闻: {full_url}') continue - title = title[0] + title = title[0].strip() msg += f'Title: {title}. Link: {full_url}\n' count += 1 sleep(0.1) @@ -234,13 +235,14 @@ def fox(): continue html = etree.HTML(response.text) - title = html.xpath('//h1[@itemprop="headline"]/text()') + # 修改 xpath 以支持更多标题结构 + title = html.xpath('//h1[@itemprop="headline"]/text() | //h1[@class="headline speakable"]/text() | //h1[@class="headline"]/text()') if not title: logger.warning(f'跳过视频或其他类型新闻: {url}') continue - title = title[0] + title = title[0].strip() msg += f'Title: {title}. Link: {url}\n' count += 1 sleep(0.1) @@ -304,4 +306,7 @@ def bbc(): except Exception as e: logger.error(f"获取BBC新闻失败: {str(e)}") - return "获取新闻失败,请查看日志了解详情" \ No newline at end of file + return "获取新闻失败,请查看日志了解详情" + +if __name__ == '__main__': + fox() \ No newline at end of file