清理无效代码

This commit is contained in:
liuwei
2026-04-20 14:18:19 +08:00
parent db08a4a15e
commit 09daaf956c
3 changed files with 0 additions and 531 deletions

View File

@@ -1,329 +0,0 @@
# -*- coding: utf-8 -*-
"""
Program: English Daily News Downloader
Author: MrCrawL
Created Date: 2024-01-21
Last Modified: 2024-03-24
Modified by: MrCrawL
"""
from utils.markdown_to_image import convert_md_str_to_image
'''Existing problem: text with hyperlink won't be saved'''
import requests
from time import localtime, sleep
from lxml import etree
from loguru import logger
# 请求配置
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
TIMEOUT = 10
MAX_RETRIES = 3
NEWS_LIMIT = 30
def get_time():
date_ = f'{str(localtime().tm_year).zfill(4)}-{str(localtime().tm_mon).zfill(2)}-{str(localtime().tm_mday).zfill(2)}'
return date_
def title_tidy(title_list):
t_index = []
for i in range(1, len(title_list)):
if title_list[i] == title_list[i - 1]: t_index.append(i)
t_index.reverse()
for i in range(len(t_index)): title_list.pop(t_index[i])
return title_list
def text_tidy(p_text):
text_ = p_text.replace('', "'")
text_ = text_.replace(' \n\n', ' ')
text_ = text_.replace('\n\n ', ' ')
text_ = text_.replace('\n\n,', ' ,')
text_ = text_.replace(',\n\n', ', ')
text_ = text_.replace(';\n\n', '; ')
text_ = text_.replace('\n\n;', ' ;')
text_ = text_.replace(':\n\n', ': ')
text_ = text_.replace('\n\n:', ' :')
text_ = text_.replace('"\n\n', '" ')
text_ = text_.replace('\n\n"', ' "')
text_ = text_.replace("'\n\n", "' ")
text_ = text_.replace("\n\n'", " '")
return text_
def safe_request(url, retry_count=0):
"""安全的请求方法,包含重试机制"""
try:
response = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
response.raise_for_status()
return response
except requests.RequestException as e:
if retry_count < MAX_RETRIES:
logger.warning(f"请求失败,正在进行第{retry_count + 1}次重试: {url}")
sleep(1)
return safe_request(url, retry_count + 1)
else:
logger.error(f"请求失败: {url}, 错误: {str(e)}")
return None
def nbc():
logger.info("开始获取NBC新闻")
try:
url = 'https://www.nbcnews.com/'
response = safe_request(url)
if not response:
return "获取NBC新闻失败"
html = etree.HTML(response.text)
href = html.xpath('//h2/a/@href')
href = title_tidy(href)
msg = ''
count = 0
for url in href[:NEWS_LIMIT]:
try:
response = safe_request(url)
if not response:
continue
html = etree.HTML(response.text)
title = html.xpath('//h1/text()')
if not title:
logger.warning(f'跳过视频或其他类型新闻: {url}')
continue
title = title[0]
msg += f'Title: {title}. Link: {url}\n'
count += 1
sleep(0.1)
except Exception as e:
logger.error(f"处理新闻失败: {url}, 错误: {str(e)}")
continue
logger.info(f"NBC新闻获取完成共获取{count}")
return msg
except Exception as e:
logger.error(f"获取NBC新闻失败: {str(e)}")
return "获取新闻失败,请查看日志了解详情"
def cnn():
logger.info("开始获取CNN新闻")
try:
head = 'https://www.cnn.com'
response = safe_request(head + '/')
if not response:
return "获取CNN新闻失败"
html = etree.HTML(response.text)
href = html.xpath('//a[@data-link-type="article"]/@href')
href = title_tidy(href)
msg = ''
count = 0
for url in href[:NEWS_LIMIT]:
try:
full_url = head + url
response = safe_request(full_url)
if not response:
continue
html = etree.HTML(response.text)
title = html.xpath('//h1[@data-editable="headlineText"]/text()')
if not title:
logger.warning(f'跳过视频或其他类型新闻: {full_url}')
continue
title = title[0].strip()
msg += f'Title: {title}. Link: {full_url}\n'
count += 1
sleep(0.1)
except Exception as e:
logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
continue
logger.info(f"CNN新闻获取完成共获取{count}")
return msg
except Exception as e:
logger.error(f"获取CNN新闻失败: {str(e)}")
return "获取新闻失败,请查看日志了解详情"
def abc():
logger.info("开始获取ABC新闻")
try:
head = 'https://abcnews.go.com' # 移除末尾的斜杠
response = safe_request(head)
if not response:
return "获取ABC新闻失败"
html = etree.HTML(response.text)
href1 = html.xpath('//div[@class="HeadlinesTrio"]/a/@href')
href2 = html.xpath(
'//div[@class="title card"]/a[@class="AnchorLink"]/@href | //div[@class="title"]/a[@class="AnchorLink"]/@href')
href3 = html.xpath('//a[@target="_self"]/@href')
href4 = html.xpath('//a[@class="AnchorLink VideoTile"]/@href')
href = title_tidy(href1 + href2 + href3 + href4)
msg = ''
count = 0
for url in href[:NEWS_LIMIT]:
try:
# 处理URL格式
if url.startswith('http'):
full_url = url
elif url.startswith('//'):
full_url = 'https:' + url
else:
full_url = head + ('' if url.startswith('/') else '/') + url
response = safe_request(full_url)
if not response:
continue
html = etree.HTML(response.text)
title = html.xpath('//div[@data-testid="prism-headline"]/h1/text()')
if not title:
logger.warning(f'跳过视频或其他类型新闻: {full_url}')
continue
title = title[0]
msg += f'Title: {title}. Link: {full_url}\n'
count += 1
sleep(0.1)
except Exception as e:
logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
continue
logger.info(f"ABC新闻获取完成共获取{count}")
return msg
except Exception as e:
logger.error(f"获取ABC新闻失败: {str(e)}")
return "获取新闻失败,请查看日志了解详情"
def fox():
logger.info("开始获取FOX新闻")
try:
head = 'https://www.foxnews.com/'
response = safe_request(head)
if not response:
return "获取FOX新闻失败"
html = etree.HTML(response.text)
href = html.xpath('//h3[@class="title"]/a/@href')
href = title_tidy(href)
msg = ''
count = 0
for url in href[:NEWS_LIMIT]:
try:
if url[0:4] != 'http':
url = 'https:' + url
response = safe_request(url)
if not response:
continue
html = etree.HTML(response.text)
title = html.xpath('//h1[@itemprop="headline"]/text()')
if not title:
logger.warning(f'跳过视频或其他类型新闻: {url}')
continue
title = title[0]
msg += f'Title: {title}. Link: {url}\n'
count += 1
sleep(0.1)
except Exception as e:
logger.error(f"处理新闻失败: {url}, 错误: {str(e)}")
continue
logger.info(f"FOX新闻获取完成共获取{count}")
return msg
except Exception as e:
logger.error(f"获取FOX新闻失败: {str(e)}")
return "获取新闻失败,请查看日志了解详情"
def bbc():
logger.info("开始获取BBC新闻")
try:
head = 'https://www.bbc.com'
response = safe_request(head + '/')
if not response:
return "获取BBC新闻失败"
html = etree.HTML(response.text)
href = html.xpath(
'//h2[@data-testid="card-headline"]/../../../../../@href | //h2[@data-testid="card-headline"]/../../../../@href')
href = title_tidy(href)
msg = ''
count = 0
for url in href[:NEWS_LIMIT]:
try:
if url[0:4] == 'http':
continue
full_url = head + url
response = safe_request(full_url)
if not response:
continue
html = etree.HTML(response.text)
title = html.xpath('//div[@data-component="headline-block"]/h1/text()')
if not title:
logger.warning(f'跳过视频或其他类型新闻: {full_url}')
continue
title = title[0]
msg += f'Title: {title}. Link: {full_url}\n'
count += 1
sleep(0.1)
except Exception as e:
logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
continue
logger.info(f"BBC新闻获取完成共获取{count}")
return msg
except Exception as e:
logger.error(f"获取BBC新闻失败: {str(e)}")
return "获取新闻失败,请查看日志了解详情"
def all_english_news():
news_titles = ""
news_titles += nbc() + "\n"
news_titles += cnn() + "\n"
news_titles += abc() + "\n"
news_titles += fox() + "\n"
news_titles += bbc() + "\n"
markdown_news = news_titles # self.dify_news_title_analyze(news_titles)
spath = convert_md_str_to_image(markdown_news, "news_output.png")
return spath

View File

@@ -1,66 +0,0 @@
# -*- coding: utf-8 -*-
# @Time : 2022/12/29 15:51
# @Author : 南宫乘风
# @Email : 1794748404@qq.com
# @File : epic.py
# @Software: PyCharm
from datetime import datetime
import json
import re
import time
import requests
from bs4 import BeautifulSoup
def is_friday():
today = datetime.today()
return today.weekday() == 4 # Monday is 0 and Sunday is 6, so Friday is 4
def get_free():
url = 'https://steamstats.cn/xi'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36 Edg/90.0.818.41'}
r = requests.get(url, headers=headers)
r.raise_for_status()
r.encoding = r.apparent_encoding
soup = BeautifulSoup(r.text, "html.parser")
text = "今日喜加一 :" + 'https://store.epicgames.com/en-US/free-games' +'\n'
tbody = soup.find('tbody')
tr = tbody.find_all('tr')
i = 1
for tr in tr:
td = tr.find_all('td')
a_tags = td[6].find_all('a')
for a in a_tags:
href_value = a.get('href')
name = td[1].string.strip().replace('\n', '').replace('\r', '')
gametype = td[2].string.replace(" ", "").replace('\n', '').replace('\r', '')
start = td[3].string.replace(" ", "").replace('\n', '').replace('\r', '')
end = td[4].string.replace(" ", "").replace('\n', '').replace('\r', '')
time = td[5].string.replace(" ", "").replace('\n', '').replace('\r', '')
oringin = td[6].find('span').string.replace(" ", "").replace('\n', '').replace('\r', '')
text = (text + "序号:" + str(
i) + '\n' + "游戏名称:" + name + '\n'
+ "DLC/game" + gametype + '\n'
+ "开始时间:" + start + '\n'
+ "结束时间:" + end + '\n'
+ "是否永久:" + time + '\n'
+ "平台:" + oringin + '\n'
+ "URL" + href_value + '\n'
)
# print(text)
i=i+1
return text
if __name__ == "__main__":
print(get_free())
# if len(game_info) > 40:
# send_to_epic_message(get_free())

View File

@@ -1,136 +0,0 @@
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
from typing import Optional
from loguru import logger
import time
from datetime import datetime
import requests
from lxml import etree
from base import func_english_news
class News(object):
def __init__(self) -> None:
self.LOG = logger
self.week = {0: "周一", 1: "周二", 2: "周三", 3: "周四", 4: "周五", 5: "周六", 6: "周日"}
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}
def get_important_news(self):
url = "https://www.cls.cn/api/sw?app=CailianpressWeb&os=web&sv=7.7.5"
data = {"type": "telegram", "keyword": "你需要知道的隔夜全球要闻", "page": 0,
"rn": 1, "os": "web", "sv": "7.7.5", "app": "CailianpressWeb"}
try:
rsp = requests.post(url=url, headers=self.headers, data=data)
data = json.loads(rsp.text)["data"]["telegram"]["data"][0]
news = data["descr"]
timestamp = data["time"]
ts = time.localtime(timestamp)
weekday_news = datetime(*ts[:6]).weekday()
except Exception as e:
self.LOG.error(e)
return ""
weekday_now = datetime.now().weekday()
if weekday_news != weekday_now:
return "" # 旧闻观察发现周二周六早晨6点半左右发布
fmt_time = time.strftime("%Y年%m月%d", ts)
news = re.sub(r"(\d{1,2}、)", r"\n\1", news)
fmt_news = "".join(etree.HTML(news).xpath(" // text()"))
fmt_news = re.sub(r"周[一|二|三|四|五|六|日]你需要知道的", r"", fmt_news)
return f"{fmt_time} {self.week[weekday_news]}\n{fmt_news}"
def get_baidu_news(self):
url = "https://top.baidu.com/api/board?platform=wise&tab=realtime"
# 获取当前日期和英文星期名
now = datetime.now()
current_date = now.strftime("%Y年%m月%d")
english_weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
chinese_weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
# 将英文星期名映射为中文
current_weekday_index = now.weekday() # 获取当前是星期几0代表星期一6代表星期日
current_weekday_chinese = chinese_weekdays[current_weekday_index]
# 初始化一个空字符串来存储结果
output = f"当前日期:{current_date} {current_weekday_chinese}\n\n"
try:
response = requests.get(url, headers=self.headers, timeout=10)
response.raise_for_status()
if response.status_code == 200:
post = response.json()
cards = post.get('data', {}).get('cards', [])
index = 1
for card in cards:
blocks = card.get('content', [])
for block in blocks:
articles = block.get('content', [])
for article in articles:
if isinstance(article, dict) and 'word' in article:
title = str(article.get('word', '')).strip().replace(" ", "_")
raw_url = str(article.get('url', '')).strip()
url = raw_url.strip('`').strip()
output += f"{index} :#{title}\n"
index += 1
# 输出最终的字符串
return output
else:
self.LOG.error(f"获取百度新闻失败,状态码: {response.status_code}")
return "获取百度新闻失败,请稍后再试"
except Exception as e:
self.LOG.error(f"获取百度新闻时出错: {e}")
return f"获取百度新闻时出错: {e}"
def get_eng_news(self, website):
if website == 'nbc':
return func_english_news.nbc()
elif website == 'cnn':
return func_english_news.cnn()
elif website == 'abc':
return func_english_news.abc()
elif website == 'fox':
return func_english_news.fox()
elif website == 'bbc':
return func_english_news.bbc()
def get_news_60s(self) -> Optional[str]:
"""
调用 60s 接口并提取 image 字段
:return: image url 或 None
"""
API_URL = "http://192.168.2.32:4399/v2/60s"
try:
resp = requests.get(API_URL)
resp.raise_for_status() # HTTP 非 200 会抛异常
data = resp.json()
return data.get("data", {}).get("image")
except requests.RequestException as e:
print(f"请求接口失败: {e}")
except ValueError as e:
print(f"JSON 解析失败: {e}")
return None
if __name__ == "__main__":
news = News()
print(news.get_baidu_news())
# # msg = "@水牛-分身 今日百度新闻"
# # q = re.sub(r"@.*?[\u2005|\s]", "", msg).replace(" ", "")
# # print(q)
# print(news.get_eng_news('nbc'))