Files
abot/base/func_news.py
2025-02-05 11:12:20 +08:00

131 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
import logging
import time
from datetime import datetime
import requests
from lxml import etree
from base import func_english_news
class News(object):
def __init__(self) -> None:
self.LOG = logging.getLogger(__name__)
self.week = {0: "周一", 1: "周二", 2: "周三", 3: "周四", 4: "周五", 5: "周六", 6: "周日"}
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}
def get_important_news(self):
url = "https://www.cls.cn/api/sw?app=CailianpressWeb&os=web&sv=7.7.5"
data = {"type": "telegram", "keyword": "你需要知道的隔夜全球要闻", "page": 0,
"rn": 1, "os": "web", "sv": "7.7.5", "app": "CailianpressWeb"}
try:
rsp = requests.post(url=url, headers=self.headers, data=data)
data = json.loads(rsp.text)["data"]["telegram"]["data"][0]
news = data["descr"]
timestamp = data["time"]
ts = time.localtime(timestamp)
weekday_news = datetime(*ts[:6]).weekday()
except Exception as e:
self.LOG.error(e)
return ""
weekday_now = datetime.now().weekday()
if weekday_news != weekday_now:
return "" # 旧闻观察发现周二周六早晨6点半左右发布
fmt_time = time.strftime("%Y年%m月%d", ts)
news = re.sub(r"(\d{1,2}、)", r"\n\1", news)
fmt_news = "".join(etree.HTML(news).xpath(" // text()"))
fmt_news = re.sub(r"周[一|二|三|四|五|六|日]你需要知道的", r"", fmt_news)
return f"{fmt_time} {self.week[weekday_news]}\n{fmt_news}"
def get_36kr_news(self):
url = "https://orz.ai/dailynews/?platform=36kr"
# 获取当前日期和英文星期名
now = datetime.now()
current_date = now.strftime("%Y年%m月%d")
english_weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
chinese_weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
# 将英文星期名映射为中文
current_weekday_index = now.weekday() # 获取当前是星期几0代表星期一6代表星期日
current_weekday_chinese = chinese_weekdays[current_weekday_index]
# 初始化一个空字符串来存储结果
output = f"当前日期:{current_date} {current_weekday_chinese}\n\n"
response = requests.get(url)
if response.status_code == 200:
post = response.json()
str = post['data']
# 遍历列表并格式化每个字典的title, url然后添加到output字符串中
for index, article in enumerate(str, start=1):
title = article['title']
url = article['url']
# 使用f-string格式化字符串并添加到output中
output += f"{index}. 标题: {title}\n URL: {url}\n"
# 输出最终的字符串(这里只是为了展示,实际上你可以根据需要处理这个字符串)
return output
def get_baidu_news(self):
url = "https://top.baidu.com/api/board?platform=wise&tab=realtime"
# 获取当前日期和英文星期名
now = datetime.now()
current_date = now.strftime("%Y年%m月%d")
english_weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
chinese_weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
# 将英文星期名映射为中文
current_weekday_index = now.weekday() # 获取当前是星期几0代表星期一6代表星期日
current_weekday_chinese = chinese_weekdays[current_weekday_index]
# 初始化一个空字符串来存储结果
output = f"当前日期:{current_date} {current_weekday_chinese}\n\n"
response = requests.get(url)
if response.status_code == 200:
post = response.json()
# 提取content列表
str = post['data']['cards'][0]['content']
# 遍历列表并格式化每个字典的title, url然后添加到output字符串中
for index, article in enumerate(str, start=1):
title = article['word']
# url = article['url']
# 使用f-string格式化字符串并添加到output中
output += f"{index}. : {title}\n"
# 输出最终的字符串(这里只是为了展示,实际上你可以根据需要处理这个字符串)
return output
def get_eng_news(self,website):
if website == 'nbc':
return func_english_news.nbc()
elif website == 'cnn':
return func_english_news.cnn()
elif website == 'abc':
return func_english_news.abc()
elif website == 'fox':
return func_english_news.fox()
elif website == 'bbc':
return func_english_news.bbc()
if __name__ == "__main__":
news = News()
print(news.get_baidu_news())
# # msg = "@水牛-分身 今日百度新闻"
# # q = re.sub(r"@.*?[\u2005|\s]", "", msg).replace(" ", "")
# # print(q)
# print(news.get_eng_news('nbc'))