Files
abot/base/func_news.py
2025-06-04 11:03:56 +08:00

115 lines
4.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
from loguru import logger
import time
from datetime import datetime
import requests
from lxml import etree
from base import func_english_news
class News(object):
def __init__(self) -> None:
self.LOG = logger
self.week = {0: "周一", 1: "周二", 2: "周三", 3: "周四", 4: "周五", 5: "周六", 6: "周日"}
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}
def get_important_news(self):
url = "https://www.cls.cn/api/sw?app=CailianpressWeb&os=web&sv=7.7.5"
data = {"type": "telegram", "keyword": "你需要知道的隔夜全球要闻", "page": 0,
"rn": 1, "os": "web", "sv": "7.7.5", "app": "CailianpressWeb"}
try:
rsp = requests.post(url=url, headers=self.headers, data=data)
data = json.loads(rsp.text)["data"]["telegram"]["data"][0]
news = data["descr"]
timestamp = data["time"]
ts = time.localtime(timestamp)
weekday_news = datetime(*ts[:6]).weekday()
except Exception as e:
self.LOG.error(e)
return ""
weekday_now = datetime.now().weekday()
if weekday_news != weekday_now:
return "" # 旧闻观察发现周二周六早晨6点半左右发布
fmt_time = time.strftime("%Y年%m月%d", ts)
news = re.sub(r"(\d{1,2}、)", r"\n\1", news)
fmt_news = "".join(etree.HTML(news).xpath(" // text()"))
fmt_news = re.sub(r"周[一|二|三|四|五|六|日]你需要知道的", r"", fmt_news)
return f"{fmt_time} {self.week[weekday_news]}\n{fmt_news}"
def get_baidu_news(self):
url = "https://top.baidu.com/api/board?platform=wise&tab=realtime"
# 获取当前日期和英文星期名
now = datetime.now()
current_date = now.strftime("%Y年%m月%d")
english_weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
chinese_weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
# 将英文星期名映射为中文
current_weekday_index = now.weekday() # 获取当前是星期几0代表星期一6代表星期日
current_weekday_chinese = chinese_weekdays[current_weekday_index]
# 初始化一个空字符串来存储结果
output = f"当前日期:{current_date} {current_weekday_chinese}\n\n"
try:
response = requests.get(url, headers=self.headers, timeout=10)
response.raise_for_status() # 检查请求是否成功
if response.status_code == 200:
post = response.json()
# 提取content列表 - 避免使用str作为变量名
content_list = post.get('data', {}).get('cards', [])
if content_list and len(content_list) > 0:
news_items = content_list[0].get('content', [])
# 遍历列表并格式化每个字典的title, url然后添加到output字符串中
for index, article in enumerate(news_items, start=1):
if isinstance(article, dict) and 'word' in article:
title = article['word'].replace(" ", "_")
# url = article.get('url', '')
# 使用f-string格式化字符串并添加到output中
output += f"{index} :#{title}\n"
# 输出最终的字符串
return output
else:
self.LOG.error(f"获取百度新闻失败,状态码: {response.status_code}")
return "获取百度新闻失败,请稍后再试"
except Exception as e:
self.LOG.error(f"获取百度新闻时出错: {e}")
return f"获取百度新闻时出错: {e}"
def get_eng_news(self, website):
if website == 'nbc':
return func_english_news.nbc()
elif website == 'cnn':
return func_english_news.cnn()
elif website == 'abc':
return func_english_news.abc()
elif website == 'fox':
return func_english_news.fox()
elif website == 'bbc':
return func_english_news.bbc()
if __name__ == "__main__":
news = News()
# # msg = "@水牛-分身 今日百度新闻"
# # q = re.sub(r"@.*?[\u2005|\s]", "", msg).replace(" ", "")
# # print(q)
# print(news.get_eng_news('nbc'))