#! /usr/bin/env python3 # -*- coding: utf-8 -*- import json import re import logging import time from datetime import datetime import requests from lxml import etree from base import func_english_news class News(object): def __init__(self) -> None: self.LOG = logging.getLogger(__name__) self.week = {0: "周一", 1: "周二", 2: "周三", 3: "周四", 4: "周五", 5: "周六", 6: "周日"} self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"} def get_important_news(self): url = "https://www.cls.cn/api/sw?app=CailianpressWeb&os=web&sv=7.7.5" data = {"type": "telegram", "keyword": "你需要知道的隔夜全球要闻", "page": 0, "rn": 1, "os": "web", "sv": "7.7.5", "app": "CailianpressWeb"} try: rsp = requests.post(url=url, headers=self.headers, data=data) data = json.loads(rsp.text)["data"]["telegram"]["data"][0] news = data["descr"] timestamp = data["time"] ts = time.localtime(timestamp) weekday_news = datetime(*ts[:6]).weekday() except Exception as e: self.LOG.error(e) return "" weekday_now = datetime.now().weekday() if weekday_news != weekday_now: return "" # 旧闻,观察发现周二~周六早晨6点半左右发布 fmt_time = time.strftime("%Y年%m月%d日", ts) news = re.sub(r"(\d{1,2}、)", r"\n\1", news) fmt_news = "".join(etree.HTML(news).xpath(" // text()")) fmt_news = re.sub(r"周[一|二|三|四|五|六|日]你需要知道的", r"", fmt_news) return f"{fmt_time} {self.week[weekday_news]}\n{fmt_news}" def get_baidu_news(self): url = "https://top.baidu.com/api/board?platform=wise&tab=realtime" # 获取当前日期和英文星期名 now = datetime.now() current_date = now.strftime("%Y年%m月%d日") english_weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] chinese_weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"] # 将英文星期名映射为中文 current_weekday_index = now.weekday() # 获取当前是星期几(0代表星期一,6代表星期日) current_weekday_chinese = chinese_weekdays[current_weekday_index] # 初始化一个空字符串来存储结果 output = f"当前日期:{current_date} {current_weekday_chinese}\n\n" try: response = requests.get(url, headers=self.headers, timeout=10) response.raise_for_status() # 检查请求是否成功 if response.status_code == 200: post = response.json() # 提取content列表 - 避免使用str作为变量名 content_list = post.get('data', {}).get('cards', []) if content_list and len(content_list) > 0: news_items = content_list[0].get('content', []) # 遍历列表,并格式化每个字典的title, url,然后添加到output字符串中 for index, article in enumerate(news_items, start=1): if isinstance(article, dict) and 'word' in article: title = article['word'].replace(" ", "_") # url = article.get('url', '') # 使用f-string格式化字符串,并添加到output中 output += f"{index} :#{title}\n" # 输出最终的字符串 return output else: self.LOG.error(f"获取百度新闻失败,状态码: {response.status_code}") return "获取百度新闻失败,请稍后再试" except Exception as e: self.LOG.error(f"获取百度新闻时出错: {e}") return f"获取百度新闻时出错: {e}" def get_eng_news(self,website): if website == 'nbc': return func_english_news.nbc() elif website == 'cnn': return func_english_news.cnn() elif website == 'abc': return func_english_news.abc() elif website == 'fox': return func_english_news.fox() elif website == 'bbc': return func_english_news.bbc() if __name__ == "__main__": news = News() print(news.get_baidu_news()) # # msg = "@水牛-分身 今日百度新闻" # # q = re.sub(r"@.*?[\u2005|\s]", "", msg).replace(" ", "") # # print(q) # print(news.get_eng_news('nbc'))