#! /usr/bin/env python3 # -*- coding: utf-8 -*- import json import re import logging import time from datetime import datetime import requests from lxml import etree from base import func_english_news class News(object): def __init__(self) -> None: self.LOG = logging.getLogger(__name__) self.week = {0: "周一", 1: "周二", 2: "周三", 3: "周四", 4: "周五", 5: "周六", 6: "周日"} self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"} def get_important_news(self): url = "https://www.cls.cn/api/sw?app=CailianpressWeb&os=web&sv=7.7.5" data = {"type": "telegram", "keyword": "你需要知道的隔夜全球要闻", "page": 0, "rn": 1, "os": "web", "sv": "7.7.5", "app": "CailianpressWeb"} try: rsp = requests.post(url=url, headers=self.headers, data=data) data = json.loads(rsp.text)["data"]["telegram"]["data"][0] news = data["descr"] timestamp = data["time"] ts = time.localtime(timestamp) weekday_news = datetime(*ts[:6]).weekday() except Exception as e: self.LOG.error(e) return "" weekday_now = datetime.now().weekday() if weekday_news != weekday_now: return "" # 旧闻,观察发现周二~周六早晨6点半左右发布 fmt_time = time.strftime("%Y年%m月%d日", ts) news = re.sub(r"(\d{1,2}、)", r"\n\1", news) fmt_news = "".join(etree.HTML(news).xpath(" // text()")) fmt_news = re.sub(r"周[一|二|三|四|五|六|日]你需要知道的", r"", fmt_news) return f"{fmt_time} {self.week[weekday_news]}\n{fmt_news}" def get_36kr_news(self): url = "https://orz.ai/dailynews/?platform=36kr" # 获取当前日期和英文星期名 now = datetime.now() current_date = now.strftime("%Y年%m月%d日") english_weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] chinese_weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"] # 将英文星期名映射为中文 current_weekday_index = now.weekday() # 获取当前是星期几(0代表星期一,6代表星期日) current_weekday_chinese = chinese_weekdays[current_weekday_index] # 初始化一个空字符串来存储结果 output = f"当前日期:{current_date} {current_weekday_chinese}\n\n" response = requests.get(url) if response.status_code == 200: post = response.json() str = post['data'] # 遍历列表,并格式化每个字典的title, url,然后添加到output字符串中 for index, article in enumerate(str, start=1): title = article['title'] url = article['url'] # 使用f-string格式化字符串,并添加到output中 output += f"{index}. 标题: {title}\n URL: {url}\n" # 输出最终的字符串(这里只是为了展示,实际上你可以根据需要处理这个字符串) return output def get_baidu_news(self): url = "https://top.baidu.com/api/board?platform=wise&tab=realtime" # 获取当前日期和英文星期名 now = datetime.now() current_date = now.strftime("%Y年%m月%d日") english_weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] chinese_weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"] # 将英文星期名映射为中文 current_weekday_index = now.weekday() # 获取当前是星期几(0代表星期一,6代表星期日) current_weekday_chinese = chinese_weekdays[current_weekday_index] # 初始化一个空字符串来存储结果 output = f"当前日期:{current_date} {current_weekday_chinese}\n\n" response = requests.get(url) if response.status_code == 200: post = response.json() # 提取content列表 str = post['data']['cards'][0]['content'] # 遍历列表,并格式化每个字典的title, url,然后添加到output字符串中 for index, article in enumerate(str, start=1): title = article['word'].replace(" ", "_") # url = article['url'] # 使用f-string格式化字符串,并添加到output中 output += f"{index} :#{title}\n" # 输出最终的字符串(这里只是为了展示,实际上你可以根据需要处理这个字符串) return output def get_eng_news(self,website): if website == 'nbc': return func_english_news.nbc() elif website == 'cnn': return func_english_news.cnn() elif website == 'abc': return func_english_news.abc() elif website == 'fox': return func_english_news.fox() elif website == 'bbc': return func_english_news.bbc() if __name__ == "__main__": news = News() print(news.get_baidu_news()) # # msg = "@水牛-分身 今日百度新闻" # # q = re.sub(r"@.*?[\u2005|\s]", "", msg).replace(" ", "") # # print(q) # print(news.get_eng_news('nbc'))