清理无效代码
This commit is contained in:
@@ -1,329 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Program: English Daily News Downloader
|
||||
Author: MrCrawL
|
||||
Created Date: 2024-01-21
|
||||
Last Modified: 2024-03-24
|
||||
Modified by: MrCrawL
|
||||
"""
|
||||
from utils.markdown_to_image import convert_md_str_to_image
|
||||
|
||||
'''Existing problem: text with hyperlink won't be saved'''
|
||||
|
||||
import requests
|
||||
from time import localtime, sleep
|
||||
from lxml import etree
|
||||
from loguru import logger
|
||||
|
||||
# 请求配置
|
||||
HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
TIMEOUT = 10
|
||||
MAX_RETRIES = 3
|
||||
NEWS_LIMIT = 30
|
||||
|
||||
|
||||
def get_time():
|
||||
date_ = f'{str(localtime().tm_year).zfill(4)}-{str(localtime().tm_mon).zfill(2)}-{str(localtime().tm_mday).zfill(2)}'
|
||||
return date_
|
||||
|
||||
|
||||
def title_tidy(title_list):
|
||||
t_index = []
|
||||
for i in range(1, len(title_list)):
|
||||
if title_list[i] == title_list[i - 1]: t_index.append(i)
|
||||
t_index.reverse()
|
||||
for i in range(len(t_index)): title_list.pop(t_index[i])
|
||||
return title_list
|
||||
|
||||
|
||||
def text_tidy(p_text):
|
||||
text_ = p_text.replace('’', "'")
|
||||
text_ = text_.replace(' \n\n', ' ')
|
||||
text_ = text_.replace('\n\n ', ' ')
|
||||
text_ = text_.replace('\n\n,', ' ,')
|
||||
text_ = text_.replace(',\n\n', ', ')
|
||||
text_ = text_.replace(';\n\n', '; ')
|
||||
text_ = text_.replace('\n\n;', ' ;')
|
||||
text_ = text_.replace(':\n\n', ': ')
|
||||
text_ = text_.replace('\n\n:', ' :')
|
||||
text_ = text_.replace('"\n\n', '" ')
|
||||
text_ = text_.replace('\n\n"', ' "')
|
||||
text_ = text_.replace("'\n\n", "' ")
|
||||
text_ = text_.replace("\n\n'", " '")
|
||||
return text_
|
||||
|
||||
|
||||
def safe_request(url, retry_count=0):
|
||||
"""安全的请求方法,包含重试机制"""
|
||||
try:
|
||||
response = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except requests.RequestException as e:
|
||||
if retry_count < MAX_RETRIES:
|
||||
logger.warning(f"请求失败,正在进行第{retry_count + 1}次重试: {url}")
|
||||
sleep(1)
|
||||
return safe_request(url, retry_count + 1)
|
||||
else:
|
||||
logger.error(f"请求失败: {url}, 错误: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def nbc():
|
||||
logger.info("开始获取NBC新闻")
|
||||
try:
|
||||
url = 'https://www.nbcnews.com/'
|
||||
response = safe_request(url)
|
||||
if not response:
|
||||
return "获取NBC新闻失败"
|
||||
|
||||
html = etree.HTML(response.text)
|
||||
href = html.xpath('//h2/a/@href')
|
||||
href = title_tidy(href)
|
||||
|
||||
msg = ''
|
||||
count = 0
|
||||
|
||||
for url in href[:NEWS_LIMIT]:
|
||||
try:
|
||||
response = safe_request(url)
|
||||
if not response:
|
||||
continue
|
||||
|
||||
html = etree.HTML(response.text)
|
||||
title = html.xpath('//h1/text()')
|
||||
|
||||
if not title:
|
||||
logger.warning(f'跳过视频或其他类型新闻: {url}')
|
||||
continue
|
||||
|
||||
title = title[0]
|
||||
msg += f'Title: {title}. Link: {url}\n'
|
||||
count += 1
|
||||
sleep(0.1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理新闻失败: {url}, 错误: {str(e)}")
|
||||
continue
|
||||
|
||||
logger.info(f"NBC新闻获取完成,共获取{count}条")
|
||||
return msg
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取NBC新闻失败: {str(e)}")
|
||||
return "获取新闻失败,请查看日志了解详情"
|
||||
|
||||
|
||||
def cnn():
|
||||
logger.info("开始获取CNN新闻")
|
||||
try:
|
||||
head = 'https://www.cnn.com'
|
||||
response = safe_request(head + '/')
|
||||
if not response:
|
||||
return "获取CNN新闻失败"
|
||||
|
||||
html = etree.HTML(response.text)
|
||||
href = html.xpath('//a[@data-link-type="article"]/@href')
|
||||
href = title_tidy(href)
|
||||
|
||||
msg = ''
|
||||
count = 0
|
||||
|
||||
for url in href[:NEWS_LIMIT]:
|
||||
try:
|
||||
full_url = head + url
|
||||
response = safe_request(full_url)
|
||||
if not response:
|
||||
continue
|
||||
|
||||
html = etree.HTML(response.text)
|
||||
title = html.xpath('//h1[@data-editable="headlineText"]/text()')
|
||||
|
||||
if not title:
|
||||
logger.warning(f'跳过视频或其他类型新闻: {full_url}')
|
||||
continue
|
||||
|
||||
title = title[0].strip()
|
||||
msg += f'Title: {title}. Link: {full_url}\n'
|
||||
count += 1
|
||||
sleep(0.1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
|
||||
continue
|
||||
|
||||
logger.info(f"CNN新闻获取完成,共获取{count}条")
|
||||
return msg
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取CNN新闻失败: {str(e)}")
|
||||
return "获取新闻失败,请查看日志了解详情"
|
||||
|
||||
|
||||
def abc():
|
||||
logger.info("开始获取ABC新闻")
|
||||
try:
|
||||
head = 'https://abcnews.go.com' # 移除末尾的斜杠
|
||||
response = safe_request(head)
|
||||
if not response:
|
||||
return "获取ABC新闻失败"
|
||||
|
||||
html = etree.HTML(response.text)
|
||||
href1 = html.xpath('//div[@class="HeadlinesTrio"]/a/@href')
|
||||
href2 = html.xpath(
|
||||
'//div[@class="title card"]/a[@class="AnchorLink"]/@href | //div[@class="title"]/a[@class="AnchorLink"]/@href')
|
||||
href3 = html.xpath('//a[@target="_self"]/@href')
|
||||
href4 = html.xpath('//a[@class="AnchorLink VideoTile"]/@href')
|
||||
href = title_tidy(href1 + href2 + href3 + href4)
|
||||
|
||||
msg = ''
|
||||
count = 0
|
||||
|
||||
for url in href[:NEWS_LIMIT]:
|
||||
try:
|
||||
# 处理URL格式
|
||||
if url.startswith('http'):
|
||||
full_url = url
|
||||
elif url.startswith('//'):
|
||||
full_url = 'https:' + url
|
||||
else:
|
||||
full_url = head + ('' if url.startswith('/') else '/') + url
|
||||
|
||||
response = safe_request(full_url)
|
||||
if not response:
|
||||
continue
|
||||
|
||||
html = etree.HTML(response.text)
|
||||
title = html.xpath('//div[@data-testid="prism-headline"]/h1/text()')
|
||||
|
||||
if not title:
|
||||
logger.warning(f'跳过视频或其他类型新闻: {full_url}')
|
||||
continue
|
||||
|
||||
title = title[0]
|
||||
msg += f'Title: {title}. Link: {full_url}\n'
|
||||
count += 1
|
||||
sleep(0.1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
|
||||
continue
|
||||
|
||||
logger.info(f"ABC新闻获取完成,共获取{count}条")
|
||||
return msg
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取ABC新闻失败: {str(e)}")
|
||||
return "获取新闻失败,请查看日志了解详情"
|
||||
|
||||
|
||||
def fox():
|
||||
logger.info("开始获取FOX新闻")
|
||||
try:
|
||||
head = 'https://www.foxnews.com/'
|
||||
response = safe_request(head)
|
||||
if not response:
|
||||
return "获取FOX新闻失败"
|
||||
|
||||
html = etree.HTML(response.text)
|
||||
href = html.xpath('//h3[@class="title"]/a/@href')
|
||||
href = title_tidy(href)
|
||||
|
||||
msg = ''
|
||||
count = 0
|
||||
|
||||
for url in href[:NEWS_LIMIT]:
|
||||
try:
|
||||
if url[0:4] != 'http':
|
||||
url = 'https:' + url
|
||||
|
||||
response = safe_request(url)
|
||||
if not response:
|
||||
continue
|
||||
|
||||
html = etree.HTML(response.text)
|
||||
title = html.xpath('//h1[@itemprop="headline"]/text()')
|
||||
|
||||
if not title:
|
||||
logger.warning(f'跳过视频或其他类型新闻: {url}')
|
||||
continue
|
||||
|
||||
title = title[0]
|
||||
msg += f'Title: {title}. Link: {url}\n'
|
||||
count += 1
|
||||
sleep(0.1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理新闻失败: {url}, 错误: {str(e)}")
|
||||
continue
|
||||
|
||||
logger.info(f"FOX新闻获取完成,共获取{count}条")
|
||||
return msg
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取FOX新闻失败: {str(e)}")
|
||||
return "获取新闻失败,请查看日志了解详情"
|
||||
|
||||
|
||||
def bbc():
|
||||
logger.info("开始获取BBC新闻")
|
||||
try:
|
||||
head = 'https://www.bbc.com'
|
||||
response = safe_request(head + '/')
|
||||
if not response:
|
||||
return "获取BBC新闻失败"
|
||||
|
||||
html = etree.HTML(response.text)
|
||||
href = html.xpath(
|
||||
'//h2[@data-testid="card-headline"]/../../../../../@href | //h2[@data-testid="card-headline"]/../../../../@href')
|
||||
href = title_tidy(href)
|
||||
|
||||
msg = ''
|
||||
count = 0
|
||||
|
||||
for url in href[:NEWS_LIMIT]:
|
||||
try:
|
||||
if url[0:4] == 'http':
|
||||
continue
|
||||
|
||||
full_url = head + url
|
||||
response = safe_request(full_url)
|
||||
if not response:
|
||||
continue
|
||||
|
||||
html = etree.HTML(response.text)
|
||||
title = html.xpath('//div[@data-component="headline-block"]/h1/text()')
|
||||
|
||||
if not title:
|
||||
logger.warning(f'跳过视频或其他类型新闻: {full_url}')
|
||||
continue
|
||||
|
||||
title = title[0]
|
||||
msg += f'Title: {title}. Link: {full_url}\n'
|
||||
count += 1
|
||||
sleep(0.1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
|
||||
continue
|
||||
|
||||
logger.info(f"BBC新闻获取完成,共获取{count}条")
|
||||
return msg
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取BBC新闻失败: {str(e)}")
|
||||
return "获取新闻失败,请查看日志了解详情"
|
||||
|
||||
|
||||
def all_english_news():
|
||||
news_titles = ""
|
||||
news_titles += nbc() + "\n"
|
||||
news_titles += cnn() + "\n"
|
||||
news_titles += abc() + "\n"
|
||||
news_titles += fox() + "\n"
|
||||
news_titles += bbc() + "\n"
|
||||
markdown_news = news_titles # self.dify_news_title_analyze(news_titles)
|
||||
spath = convert_md_str_to_image(markdown_news, "news_output.png")
|
||||
return spath
|
||||
@@ -1,66 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 2022/12/29 15:51
|
||||
# @Author : 南宫乘风
|
||||
# @Email : 1794748404@qq.com
|
||||
# @File : epic.py
|
||||
# @Software: PyCharm
|
||||
from datetime import datetime
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def is_friday():
|
||||
today = datetime.today()
|
||||
return today.weekday() == 4 # Monday is 0 and Sunday is 6, so Friday is 4
|
||||
|
||||
|
||||
|
||||
def get_free():
|
||||
url = 'https://steamstats.cn/xi'
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36 Edg/90.0.818.41'}
|
||||
r = requests.get(url, headers=headers)
|
||||
r.raise_for_status()
|
||||
r.encoding = r.apparent_encoding
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
text = "今日喜加一 :" + 'https://store.epicgames.com/en-US/free-games' +'\n'
|
||||
|
||||
tbody = soup.find('tbody')
|
||||
tr = tbody.find_all('tr')
|
||||
i = 1
|
||||
for tr in tr:
|
||||
td = tr.find_all('td')
|
||||
a_tags = td[6].find_all('a')
|
||||
for a in a_tags:
|
||||
href_value = a.get('href')
|
||||
name = td[1].string.strip().replace('\n', '').replace('\r', '')
|
||||
gametype = td[2].string.replace(" ", "").replace('\n', '').replace('\r', '')
|
||||
start = td[3].string.replace(" ", "").replace('\n', '').replace('\r', '')
|
||||
end = td[4].string.replace(" ", "").replace('\n', '').replace('\r', '')
|
||||
time = td[5].string.replace(" ", "").replace('\n', '').replace('\r', '')
|
||||
oringin = td[6].find('span').string.replace(" ", "").replace('\n', '').replace('\r', '')
|
||||
|
||||
text = (text + "序号:" + str(
|
||||
i) + '\n' + "游戏名称:" + name + '\n'
|
||||
+ "DLC/game:" + gametype + '\n'
|
||||
+ "开始时间:" + start + '\n'
|
||||
+ "结束时间:" + end + '\n'
|
||||
+ "是否永久:" + time + '\n'
|
||||
+ "平台:" + oringin + '\n'
|
||||
+ "URL:" + href_value + '\n'
|
||||
)
|
||||
|
||||
# print(text)
|
||||
i=i+1
|
||||
|
||||
return text
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(get_free())
|
||||
# if len(game_info) > 40:
|
||||
|
||||
# send_to_epic_message(get_free())
|
||||
@@ -1,136 +0,0 @@
|
||||
#! /usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from loguru import logger
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
from lxml import etree
|
||||
|
||||
from base import func_english_news
|
||||
|
||||
|
||||
class News(object):
|
||||
def __init__(self) -> None:
|
||||
self.LOG = logger
|
||||
self.week = {0: "周一", 1: "周二", 2: "周三", 3: "周四", 4: "周五", 5: "周六", 6: "周日"}
|
||||
self.headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}
|
||||
|
||||
def get_important_news(self):
|
||||
url = "https://www.cls.cn/api/sw?app=CailianpressWeb&os=web&sv=7.7.5"
|
||||
data = {"type": "telegram", "keyword": "你需要知道的隔夜全球要闻", "page": 0,
|
||||
"rn": 1, "os": "web", "sv": "7.7.5", "app": "CailianpressWeb"}
|
||||
try:
|
||||
rsp = requests.post(url=url, headers=self.headers, data=data)
|
||||
data = json.loads(rsp.text)["data"]["telegram"]["data"][0]
|
||||
news = data["descr"]
|
||||
timestamp = data["time"]
|
||||
ts = time.localtime(timestamp)
|
||||
weekday_news = datetime(*ts[:6]).weekday()
|
||||
except Exception as e:
|
||||
self.LOG.error(e)
|
||||
return ""
|
||||
|
||||
weekday_now = datetime.now().weekday()
|
||||
if weekday_news != weekday_now:
|
||||
return "" # 旧闻,观察发现周二~周六早晨6点半左右发布
|
||||
|
||||
fmt_time = time.strftime("%Y年%m月%d日", ts)
|
||||
|
||||
news = re.sub(r"(\d{1,2}、)", r"\n\1", news)
|
||||
fmt_news = "".join(etree.HTML(news).xpath(" // text()"))
|
||||
fmt_news = re.sub(r"周[一|二|三|四|五|六|日]你需要知道的", r"", fmt_news)
|
||||
|
||||
return f"{fmt_time} {self.week[weekday_news]}\n{fmt_news}"
|
||||
|
||||
def get_baidu_news(self):
|
||||
url = "https://top.baidu.com/api/board?platform=wise&tab=realtime"
|
||||
# 获取当前日期和英文星期名
|
||||
now = datetime.now()
|
||||
current_date = now.strftime("%Y年%m月%d日")
|
||||
english_weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
||||
chinese_weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
|
||||
|
||||
# 将英文星期名映射为中文
|
||||
current_weekday_index = now.weekday() # 获取当前是星期几(0代表星期一,6代表星期日)
|
||||
current_weekday_chinese = chinese_weekdays[current_weekday_index]
|
||||
|
||||
# 初始化一个空字符串来存储结果
|
||||
output = f"当前日期:{current_date} {current_weekday_chinese}\n\n"
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=self.headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
if response.status_code == 200:
|
||||
post = response.json()
|
||||
cards = post.get('data', {}).get('cards', [])
|
||||
index = 1
|
||||
for card in cards:
|
||||
blocks = card.get('content', [])
|
||||
for block in blocks:
|
||||
articles = block.get('content', [])
|
||||
for article in articles:
|
||||
if isinstance(article, dict) and 'word' in article:
|
||||
title = str(article.get('word', '')).strip().replace(" ", "_")
|
||||
raw_url = str(article.get('url', '')).strip()
|
||||
url = raw_url.strip('`').strip()
|
||||
output += f"{index} :#{title}\n"
|
||||
index += 1
|
||||
|
||||
# 输出最终的字符串
|
||||
return output
|
||||
else:
|
||||
self.LOG.error(f"获取百度新闻失败,状态码: {response.status_code}")
|
||||
return "获取百度新闻失败,请稍后再试"
|
||||
|
||||
except Exception as e:
|
||||
self.LOG.error(f"获取百度新闻时出错: {e}")
|
||||
return f"获取百度新闻时出错: {e}"
|
||||
|
||||
def get_eng_news(self, website):
|
||||
if website == 'nbc':
|
||||
return func_english_news.nbc()
|
||||
elif website == 'cnn':
|
||||
return func_english_news.cnn()
|
||||
elif website == 'abc':
|
||||
return func_english_news.abc()
|
||||
elif website == 'fox':
|
||||
return func_english_news.fox()
|
||||
elif website == 'bbc':
|
||||
return func_english_news.bbc()
|
||||
|
||||
def get_news_60s(self) -> Optional[str]:
|
||||
"""
|
||||
调用 60s 接口并提取 image 字段
|
||||
:return: image url 或 None
|
||||
"""
|
||||
|
||||
API_URL = "http://192.168.2.32:4399/v2/60s"
|
||||
try:
|
||||
resp = requests.get(API_URL)
|
||||
resp.raise_for_status() # HTTP 非 200 会抛异常
|
||||
|
||||
data = resp.json()
|
||||
return data.get("data", {}).get("image")
|
||||
|
||||
except requests.RequestException as e:
|
||||
print(f"请求接口失败: {e}")
|
||||
except ValueError as e:
|
||||
print(f"JSON 解析失败: {e}")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
news = News()
|
||||
print(news.get_baidu_news())
|
||||
# # msg = "@水牛-分身 今日百度新闻"
|
||||
# # q = re.sub(r"@.*?[\u2005|\s]", "", msg).replace(" ", "")
|
||||
# # print(q)
|
||||
# print(news.get_eng_news('nbc'))
|
||||
Reference in New Issue
Block a user