diff --git a/base/func_english_news.py b/base/func_english_news.py deleted file mode 100644 index b884fe0..0000000 --- a/base/func_english_news.py +++ /dev/null @@ -1,329 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Program: English Daily News Downloader -Author: MrCrawL -Created Date: 2024-01-21 -Last Modified: 2024-03-24 -Modified by: MrCrawL -""" -from utils.markdown_to_image import convert_md_str_to_image - -'''Existing problem: text with hyperlink won't be saved''' - -import requests -from time import localtime, sleep -from lxml import etree -from loguru import logger - -# 请求配置 -HEADERS = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' -} -TIMEOUT = 10 -MAX_RETRIES = 3 -NEWS_LIMIT = 30 - - -def get_time(): - date_ = f'{str(localtime().tm_year).zfill(4)}-{str(localtime().tm_mon).zfill(2)}-{str(localtime().tm_mday).zfill(2)}' - return date_ - - -def title_tidy(title_list): - t_index = [] - for i in range(1, len(title_list)): - if title_list[i] == title_list[i - 1]: t_index.append(i) - t_index.reverse() - for i in range(len(t_index)): title_list.pop(t_index[i]) - return title_list - - -def text_tidy(p_text): - text_ = p_text.replace('’', "'") - text_ = text_.replace(' \n\n', ' ') - text_ = text_.replace('\n\n ', ' ') - text_ = text_.replace('\n\n,', ' ,') - text_ = text_.replace(',\n\n', ', ') - text_ = text_.replace(';\n\n', '; ') - text_ = text_.replace('\n\n;', ' ;') - text_ = text_.replace(':\n\n', ': ') - text_ = text_.replace('\n\n:', ' :') - text_ = text_.replace('"\n\n', '" ') - text_ = text_.replace('\n\n"', ' "') - text_ = text_.replace("'\n\n", "' ") - text_ = text_.replace("\n\n'", " '") - return text_ - - -def safe_request(url, retry_count=0): - """安全的请求方法,包含重试机制""" - try: - response = requests.get(url, headers=HEADERS, timeout=TIMEOUT) - response.raise_for_status() - return response - except requests.RequestException as e: - if retry_count < MAX_RETRIES: - logger.warning(f"请求失败,正在进行第{retry_count + 1}次重试: {url}") - sleep(1) - return safe_request(url, retry_count + 1) - else: - logger.error(f"请求失败: {url}, 错误: {str(e)}") - return None - - -def nbc(): - logger.info("开始获取NBC新闻") - try: - url = 'https://www.nbcnews.com/' - response = safe_request(url) - if not response: - return "获取NBC新闻失败" - - html = etree.HTML(response.text) - href = html.xpath('//h2/a/@href') - href = title_tidy(href) - - msg = '' - count = 0 - - for url in href[:NEWS_LIMIT]: - try: - response = safe_request(url) - if not response: - continue - - html = etree.HTML(response.text) - title = html.xpath('//h1/text()') - - if not title: - logger.warning(f'跳过视频或其他类型新闻: {url}') - continue - - title = title[0] - msg += f'Title: {title}. Link: {url}\n' - count += 1 - sleep(0.1) - - except Exception as e: - logger.error(f"处理新闻失败: {url}, 错误: {str(e)}") - continue - - logger.info(f"NBC新闻获取完成,共获取{count}条") - return msg - - except Exception as e: - logger.error(f"获取NBC新闻失败: {str(e)}") - return "获取新闻失败,请查看日志了解详情" - - -def cnn(): - logger.info("开始获取CNN新闻") - try: - head = 'https://www.cnn.com' - response = safe_request(head + '/') - if not response: - return "获取CNN新闻失败" - - html = etree.HTML(response.text) - href = html.xpath('//a[@data-link-type="article"]/@href') - href = title_tidy(href) - - msg = '' - count = 0 - - for url in href[:NEWS_LIMIT]: - try: - full_url = head + url - response = safe_request(full_url) - if not response: - continue - - html = etree.HTML(response.text) - title = html.xpath('//h1[@data-editable="headlineText"]/text()') - - if not title: - logger.warning(f'跳过视频或其他类型新闻: {full_url}') - continue - - title = title[0].strip() - msg += f'Title: {title}. Link: {full_url}\n' - count += 1 - sleep(0.1) - - except Exception as e: - logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}") - continue - - logger.info(f"CNN新闻获取完成,共获取{count}条") - return msg - - except Exception as e: - logger.error(f"获取CNN新闻失败: {str(e)}") - return "获取新闻失败,请查看日志了解详情" - - -def abc(): - logger.info("开始获取ABC新闻") - try: - head = 'https://abcnews.go.com' # 移除末尾的斜杠 - response = safe_request(head) - if not response: - return "获取ABC新闻失败" - - html = etree.HTML(response.text) - href1 = html.xpath('//div[@class="HeadlinesTrio"]/a/@href') - href2 = html.xpath( - '//div[@class="title card"]/a[@class="AnchorLink"]/@href | //div[@class="title"]/a[@class="AnchorLink"]/@href') - href3 = html.xpath('//a[@target="_self"]/@href') - href4 = html.xpath('//a[@class="AnchorLink VideoTile"]/@href') - href = title_tidy(href1 + href2 + href3 + href4) - - msg = '' - count = 0 - - for url in href[:NEWS_LIMIT]: - try: - # 处理URL格式 - if url.startswith('http'): - full_url = url - elif url.startswith('//'): - full_url = 'https:' + url - else: - full_url = head + ('' if url.startswith('/') else '/') + url - - response = safe_request(full_url) - if not response: - continue - - html = etree.HTML(response.text) - title = html.xpath('//div[@data-testid="prism-headline"]/h1/text()') - - if not title: - logger.warning(f'跳过视频或其他类型新闻: {full_url}') - continue - - title = title[0] - msg += f'Title: {title}. Link: {full_url}\n' - count += 1 - sleep(0.1) - - except Exception as e: - logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}") - continue - - logger.info(f"ABC新闻获取完成,共获取{count}条") - return msg - - except Exception as e: - logger.error(f"获取ABC新闻失败: {str(e)}") - return "获取新闻失败,请查看日志了解详情" - - -def fox(): - logger.info("开始获取FOX新闻") - try: - head = 'https://www.foxnews.com/' - response = safe_request(head) - if not response: - return "获取FOX新闻失败" - - html = etree.HTML(response.text) - href = html.xpath('//h3[@class="title"]/a/@href') - href = title_tidy(href) - - msg = '' - count = 0 - - for url in href[:NEWS_LIMIT]: - try: - if url[0:4] != 'http': - url = 'https:' + url - - response = safe_request(url) - if not response: - continue - - html = etree.HTML(response.text) - title = html.xpath('//h1[@itemprop="headline"]/text()') - - if not title: - logger.warning(f'跳过视频或其他类型新闻: {url}') - continue - - title = title[0] - msg += f'Title: {title}. Link: {url}\n' - count += 1 - sleep(0.1) - - except Exception as e: - logger.error(f"处理新闻失败: {url}, 错误: {str(e)}") - continue - - logger.info(f"FOX新闻获取完成,共获取{count}条") - return msg - - except Exception as e: - logger.error(f"获取FOX新闻失败: {str(e)}") - return "获取新闻失败,请查看日志了解详情" - - -def bbc(): - logger.info("开始获取BBC新闻") - try: - head = 'https://www.bbc.com' - response = safe_request(head + '/') - if not response: - return "获取BBC新闻失败" - - html = etree.HTML(response.text) - href = html.xpath( - '//h2[@data-testid="card-headline"]/../../../../../@href | //h2[@data-testid="card-headline"]/../../../../@href') - href = title_tidy(href) - - msg = '' - count = 0 - - for url in href[:NEWS_LIMIT]: - try: - if url[0:4] == 'http': - continue - - full_url = head + url - response = safe_request(full_url) - if not response: - continue - - html = etree.HTML(response.text) - title = html.xpath('//div[@data-component="headline-block"]/h1/text()') - - if not title: - logger.warning(f'跳过视频或其他类型新闻: {full_url}') - continue - - title = title[0] - msg += f'Title: {title}. Link: {full_url}\n' - count += 1 - sleep(0.1) - - except Exception as e: - logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}") - continue - - logger.info(f"BBC新闻获取完成,共获取{count}条") - return msg - - except Exception as e: - logger.error(f"获取BBC新闻失败: {str(e)}") - return "获取新闻失败,请查看日志了解详情" - - -def all_english_news(): - news_titles = "" - news_titles += nbc() + "\n" - news_titles += cnn() + "\n" - news_titles += abc() + "\n" - news_titles += fox() + "\n" - news_titles += bbc() + "\n" - markdown_news = news_titles # self.dify_news_title_analyze(news_titles) - spath = convert_md_str_to_image(markdown_news, "news_output.png") - return spath diff --git a/base/func_epic.py b/base/func_epic.py deleted file mode 100644 index 6a3e442..0000000 --- a/base/func_epic.py +++ /dev/null @@ -1,66 +0,0 @@ -# -*- coding: utf-8 -*- -# @Time : 2022/12/29 15:51 -# @Author : 南宫乘风 -# @Email : 1794748404@qq.com -# @File : epic.py -# @Software: PyCharm -from datetime import datetime -import json -import re -import time - -import requests -from bs4 import BeautifulSoup - - -def is_friday(): - today = datetime.today() - return today.weekday() == 4 # Monday is 0 and Sunday is 6, so Friday is 4 - - - -def get_free(): - url = 'https://steamstats.cn/xi' - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36 Edg/90.0.818.41'} - r = requests.get(url, headers=headers) - r.raise_for_status() - r.encoding = r.apparent_encoding - soup = BeautifulSoup(r.text, "html.parser") - text = "今日喜加一 :" + 'https://store.epicgames.com/en-US/free-games' +'\n' - - tbody = soup.find('tbody') - tr = tbody.find_all('tr') - i = 1 - for tr in tr: - td = tr.find_all('td') - a_tags = td[6].find_all('a') - for a in a_tags: - href_value = a.get('href') - name = td[1].string.strip().replace('\n', '').replace('\r', '') - gametype = td[2].string.replace(" ", "").replace('\n', '').replace('\r', '') - start = td[3].string.replace(" ", "").replace('\n', '').replace('\r', '') - end = td[4].string.replace(" ", "").replace('\n', '').replace('\r', '') - time = td[5].string.replace(" ", "").replace('\n', '').replace('\r', '') - oringin = td[6].find('span').string.replace(" ", "").replace('\n', '').replace('\r', '') - - text = (text + "序号:" + str( - i) + '\n' + "游戏名称:" + name + '\n' - + "DLC/game:" + gametype + '\n' - + "开始时间:" + start + '\n' - + "结束时间:" + end + '\n' - + "是否永久:" + time + '\n' - + "平台:" + oringin + '\n' - + "URL:" + href_value + '\n' - ) - - # print(text) - i=i+1 - - return text - -if __name__ == "__main__": - print(get_free()) - # if len(game_info) > 40: - - # send_to_epic_message(get_free()) \ No newline at end of file diff --git a/base/func_news.py b/base/func_news.py deleted file mode 100644 index 4713d41..0000000 --- a/base/func_news.py +++ /dev/null @@ -1,136 +0,0 @@ -#! /usr/bin/env python3 -# -*- coding: utf-8 -*- - -import json -import re -from typing import Optional - -from loguru import logger -import time -from datetime import datetime - -import requests -from lxml import etree - -from base import func_english_news - - -class News(object): - def __init__(self) -> None: - self.LOG = logger - self.week = {0: "周一", 1: "周二", 2: "周三", 3: "周四", 4: "周五", 5: "周六", 6: "周日"} - self.headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"} - - def get_important_news(self): - url = "https://www.cls.cn/api/sw?app=CailianpressWeb&os=web&sv=7.7.5" - data = {"type": "telegram", "keyword": "你需要知道的隔夜全球要闻", "page": 0, - "rn": 1, "os": "web", "sv": "7.7.5", "app": "CailianpressWeb"} - try: - rsp = requests.post(url=url, headers=self.headers, data=data) - data = json.loads(rsp.text)["data"]["telegram"]["data"][0] - news = data["descr"] - timestamp = data["time"] - ts = time.localtime(timestamp) - weekday_news = datetime(*ts[:6]).weekday() - except Exception as e: - self.LOG.error(e) - return "" - - weekday_now = datetime.now().weekday() - if weekday_news != weekday_now: - return "" # 旧闻,观察发现周二~周六早晨6点半左右发布 - - fmt_time = time.strftime("%Y年%m月%d日", ts) - - news = re.sub(r"(\d{1,2}、)", r"\n\1", news) - fmt_news = "".join(etree.HTML(news).xpath(" // text()")) - fmt_news = re.sub(r"周[一|二|三|四|五|六|日]你需要知道的", r"", fmt_news) - - return f"{fmt_time} {self.week[weekday_news]}\n{fmt_news}" - - def get_baidu_news(self): - url = "https://top.baidu.com/api/board?platform=wise&tab=realtime" - # 获取当前日期和英文星期名 - now = datetime.now() - current_date = now.strftime("%Y年%m月%d日") - english_weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] - chinese_weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"] - - # 将英文星期名映射为中文 - current_weekday_index = now.weekday() # 获取当前是星期几(0代表星期一,6代表星期日) - current_weekday_chinese = chinese_weekdays[current_weekday_index] - - # 初始化一个空字符串来存储结果 - output = f"当前日期:{current_date} {current_weekday_chinese}\n\n" - - try: - response = requests.get(url, headers=self.headers, timeout=10) - response.raise_for_status() - if response.status_code == 200: - post = response.json() - cards = post.get('data', {}).get('cards', []) - index = 1 - for card in cards: - blocks = card.get('content', []) - for block in blocks: - articles = block.get('content', []) - for article in articles: - if isinstance(article, dict) and 'word' in article: - title = str(article.get('word', '')).strip().replace(" ", "_") - raw_url = str(article.get('url', '')).strip() - url = raw_url.strip('`').strip() - output += f"{index} :#{title}\n" - index += 1 - - # 输出最终的字符串 - return output - else: - self.LOG.error(f"获取百度新闻失败,状态码: {response.status_code}") - return "获取百度新闻失败,请稍后再试" - - except Exception as e: - self.LOG.error(f"获取百度新闻时出错: {e}") - return f"获取百度新闻时出错: {e}" - - def get_eng_news(self, website): - if website == 'nbc': - return func_english_news.nbc() - elif website == 'cnn': - return func_english_news.cnn() - elif website == 'abc': - return func_english_news.abc() - elif website == 'fox': - return func_english_news.fox() - elif website == 'bbc': - return func_english_news.bbc() - - def get_news_60s(self) -> Optional[str]: - """ - 调用 60s 接口并提取 image 字段 - :return: image url 或 None - """ - - API_URL = "http://192.168.2.32:4399/v2/60s" - try: - resp = requests.get(API_URL) - resp.raise_for_status() # HTTP 非 200 会抛异常 - - data = resp.json() - return data.get("data", {}).get("image") - - except requests.RequestException as e: - print(f"请求接口失败: {e}") - except ValueError as e: - print(f"JSON 解析失败: {e}") - - return None - - -if __name__ == "__main__": - news = News() - print(news.get_baidu_news()) - # # msg = "@水牛-分身 今日百度新闻" - # # q = re.sub(r"@.*?[\u2005|\s]", "", msg).replace(" ", "") - # # print(q) - # print(news.get_eng_news('nbc'))