调整获取的逻辑,兼容部分有标题的新闻

This commit is contained in:
liuwei
2025-04-14 16:25:23 +08:00
parent 8afd0f49d0
commit 50b01da658

View File

@@ -186,13 +186,14 @@ def abc():
continue
html = etree.HTML(response.text)
title = html.xpath('//div[@data-testid="prism-headline"]/h1/text()')
# 修改 xpath 以支持新的标题结构
title = html.xpath('//div[@data-testid="prism-headline"]/h1/text() | //div[@data-testid="prism-headline"]//span/text()')
if not title:
logger.warning(f'跳过视频或其他类型新闻: {full_url}')
continue
title = title[0]
title = title[0].strip()
msg += f'Title: {title}. Link: {full_url}\n'
count += 1
sleep(0.1)
@@ -234,13 +235,14 @@ def fox():
continue
html = etree.HTML(response.text)
title = html.xpath('//h1[@itemprop="headline"]/text()')
# 修改 xpath 以支持更多标题结构
title = html.xpath('//h1[@itemprop="headline"]/text() | //h1[@class="headline speakable"]/text() | //h1[@class="headline"]/text()')
if not title:
logger.warning(f'跳过视频或其他类型新闻: {url}')
continue
title = title[0]
title = title[0].strip()
msg += f'Title: {title}. Link: {url}\n'
count += 1
sleep(0.1)
@@ -304,4 +306,7 @@ def bbc():
except Exception as e:
logger.error(f"获取BBC新闻失败: {str(e)}")
return "获取新闻失败,请查看日志了解详情"
return "获取新闻失败,请查看日志了解详情"
if __name__ == '__main__':
fox()