feature:全球政治经济新闻

2025-04-14 16:02:00 +08:00
parent bfdb1831d3
commit 8afd0f49d0
8 changed files with 885 additions and 220 deletions
--- a/plugins/global_news/init.py
+++ b/plugins/global_news/init.py
@@ -0,0 +1,7 @@
+# 从当前包的main模块导入GlobalNewsPlugin类
+from .main import GlobalNewsPlugin
+
+# 提供get_plugin函数，返回插件实例
+def get_plugin():
+    """获取插件实例"""
+    return GlobalNewsPlugin()
--- a/plugins/global_news/config.toml
+++ b/plugins/global_news/config.toml
@@ -0,0 +1,6 @@
+enable = true
+command = ["全球新闻", "国际新闻", "环球新闻", "政经新闻", "政治经济新闻"]
+command-format = """
+🌍全球新闻指令：
+全球新闻 - 获取最新的全球政治经济新闻
+"""
--- a/plugins/global_news/main.py
+++ b/plugins/global_news/main.py
@@ -0,0 +1,191 @@
+import logging
+import asyncio
+import threading
+import time  # 添加这一行
+from typing import Dict, Any, List, Optional, Tuple
+
+from wcferry import Wcf
+
+from plugin_common.message_plugin_interface import MessagePluginInterface
+from plugin_common.plugin_interface import PluginStatus
+from utils.decorator.plugin_decorators import plugin_stats_decorator
+from utils.robot_cmd.robot_command import Feature, PermissionStatus, GroupBotManager
+from utils.decorator.points_decorator import plugin_points_cost
+from utils.ai.dify_news_analyze import dify_news_title_analyze
+from utils.markdown_to_image import convert_md_str_to_image
+
+# 导入新闻抓取函数
+from .news_crawler import nbc, cnn, abc, fox, bbc
+
+
+class GlobalNewsPlugin(MessagePluginInterface):
+    """全球政治经济新闻插件"""
+
+    @property
+    def name(self) -> str:
+        return "全球政治经济新闻"
+
+    @property
+    def version(self) -> str:
+        return "1.0.0"
+
+    @property
+    def description(self) -> str:
+        return "提供全球政治经济新闻，支持多个国际新闻源"
+
+    @property
+    def author(self) -> str:
+        return "Trae AI"
+
+    @property
+    def command_prefix(self) -> Optional[str]:
+        return ""  # 不需要前缀，直接匹配命令
+
+    @property
+    def commands(self) -> List[str]:
+        return self._commands
+
+    def __init__(self):
+        super().__init__()
+        self._news_tasks = {}  # 存储正在进行的新闻抓取任务
+
+    def initialize(self, context: Dict[str, Any]) -> bool:
+        """初始化插件"""
+        self.LOG = logging.getLogger(f"Plugin.{self.name}")
+        self.LOG.info(f"正在初始化 {self.name} 插件...")
+
+        # 保存上下文对象
+        self.wcf = context.get("wcf")
+        self.event_system = context.get("event_system")
+        self.message_util = context.get("message_util")
+
+        self._commands = self._config.get("GlobalNews", {}).get("command", ["全球新闻", "国际新闻", "环球新闻", "政经新闻"])
+        self.command_format = self._config.get("GlobalNews", {}).get("command-format", "全球新闻 - 获取最新的全球政治经济新闻")
+        self.enable = self._config.get("GlobalNews", {}).get("enable", True)
+
+        self.LOG.info(f"[{self.name}] 插件初始化完成，指令：{self._commands}")
+        return True
+
+    def start(self) -> bool:
+        """启动插件"""
+        self.LOG.info(f"[{self.name}] 插件已启动")
+        self.status = PluginStatus.RUNNING
+        return True
+
+    def stop(self) -> bool:
+        """停止插件"""
+        self.LOG.info(f"[{self.name}] 插件已停止")
+        self.status = PluginStatus.STOPPED
+        return True
+
+    def can_process(self, message: Dict[str, Any]) -> bool:
+        """检查是否可以处理该消息"""
+        if not self.enable:
+            return False
+
+        content = str(message.get("content", "")).strip()
+        command = content.split(" ")[0]
+
+        return command in self._commands
+
+    @plugin_stats_decorator(plugin_name="全球政治经济新闻")
+    @plugin_points_cost(5, "全球新闻消耗积分", Feature.NEWS)
+    def process_message(self, message: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
+        """处理消息"""
+        content = str(message.get("content", "")).strip()
+        self.LOG.info(f"插件执行： {self.name}：{content}")
+        sender = message.get("sender")
+        roomid = message.get("roomid", "")
+        wcf: Wcf = message.get("wcf")
+        gbm: GroupBotManager = message.get("gbm")
+
+        # 检查权限
+        if roomid and gbm.get_group_permission(roomid, Feature.NEWS) == PermissionStatus.DISABLED:
+            return False, "没有权限"
+
+        # 生成唯一任务ID
+        task_id = f"{sender}_{roomid}_{int(time.time())}"
+        
+        # 发送等待消息
+        wcf.send_text("🌍正在获取全球新闻，请稍候...", 
+                      (roomid if roomid else sender), sender)
+        
+        # 启动异步任务
+        self._start_news_task(task_id, sender, roomid, wcf)
+        
+        return True, "新闻获取任务已启动"
+
+    def _start_news_task(self, task_id: str, sender: str, roomid: str, wcf: Wcf):
+        """启动异步新闻获取任务"""
+        thread = threading.Thread(
+            target=self._fetch_news_thread,
+            args=(task_id, sender, roomid, wcf)
+        )
+        thread.daemon = True
+        thread.start()
+        self._news_tasks[task_id] = thread
+        self.LOG.info(f"启动新闻获取任务: {task_id}")
+
+    def _fetch_news_thread(self, task_id: str, sender: str, roomid: str, wcf: Wcf):
+        """在单独的线程中运行异步新闻获取任务"""
+        try:
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            news_result = loop.run_until_complete(self._fetch_news_async())
+            loop.close()
+            
+            # 处理结果
+            if news_result:
+                # 发送新闻图片
+                receiver = roomid if roomid else sender
+                wcf.send_image(news_result, receiver)
+                wcf.send_text("🌍全球新闻获取完成！", receiver, sender)
+            else:
+                wcf.send_text("❌获取新闻失败，请稍后再试", 
+                             (roomid if roomid else sender), sender)
+        except Exception as e:
+            self.LOG.error(f"新闻获取任务出错: {e}")
+            wcf.send_text(f"❌获取新闻出错: {str(e)}", 
+                         (roomid if roomid else sender), sender)
+        finally:
+            # 清理任务
+            if task_id in self._news_tasks:
+                del self._news_tasks[task_id]
+
+    async def _fetch_news_async(self) -> str:
+        """异步获取所有新闻源的新闻"""
+        try:
+            # 创建所有新闻源的任务
+            tasks = [
+                self._run_in_executor(nbc),
+                self._run_in_executor(cnn),
+                self._run_in_executor(abc),
+                self._run_in_executor(fox),
+                self._run_in_executor(bbc)
+            ]
+            
+            # 并行执行所有任务
+            results = await asyncio.gather(*tasks)
+            
+            # 合并结果
+            news_titles = "\n".join(results)
+            
+            # 使用AI分析新闻
+            markdown_news = await self._run_in_executor(
+                dify_news_title_analyze, news_titles
+            )
+            
+            # 转换为图片
+            image_path = await self._run_in_executor(
+                convert_md_str_to_image, markdown_news, "news_output.png"
+            )
+            
+            return image_path
+        except Exception as e:
+            self.LOG.error(f"异步获取新闻失败: {e}")
+            return ""
+
+    async def _run_in_executor(self, func, *args):
+        """在线程池中运行同步函数"""
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(None, func, *args)
--- a/plugins/global_news/news_crawler.py
+++ b/plugins/global_news/news_crawler.py
@@ -0,0 +1,307 @@
+# -*- coding: utf-8 -*-
+"""
+Program: Global News Crawler
+Author: Trae AI (based on MrCrawL's work)
+Created Date: 2024-05-01
+"""
+import requests
+from time import localtime, sleep
+from lxml import etree
+import logging
+from datetime import datetime
+import time
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler(f'global_news_{datetime.now().strftime("%Y%m%d")}.log'),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+
+# 请求配置
+HEADERS = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+}
+TIMEOUT = 10
+MAX_RETRIES = 3
+NEWS_LIMIT = 30
+
+
+def get_time():
+    date_ = f'{str(localtime().tm_year).zfill(4)}-{str(localtime().tm_mon).zfill(2)}-{str(localtime().tm_mday).zfill(2)}'
+    return date_
+
+
+def title_tidy(title_list):
+    t_index = []
+    for i in range(1, len(title_list)):
+        if title_list[i] == title_list[i - 1]: t_index.append(i)
+    t_index.reverse()
+    for i in range(len(t_index)): title_list.pop(t_index[i])
+    return title_list
+
+
+def safe_request(url, retry_count=0):
+    """安全的请求方法，包含重试机制"""
+    try:
+        response = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
+        response.raise_for_status()
+        return response
+    except requests.RequestException as e:
+        if retry_count < MAX_RETRIES:
+            logger.warning(f"请求失败，正在进行第{retry_count + 1}次重试: {url}")
+            sleep(1)
+            return safe_request(url, retry_count + 1)
+        else:
+            logger.error(f"请求失败: {url}, 错误: {str(e)}")
+            return None
+
+
+def nbc():
+    logger.info("开始获取NBC新闻")
+    try:
+        url = 'https://www.nbcnews.com/'
+        response = safe_request(url)
+        if not response:
+            return "获取NBC新闻失败"
+
+        html = etree.HTML(response.text)
+        href = html.xpath('//h2/a/@href')
+        href = title_tidy(href)
+
+        msg = ''
+        count = 0
+
+        for url in href[:NEWS_LIMIT]:
+            try:
+                response = safe_request(url)
+                if not response:
+                    continue
+
+                html = etree.HTML(response.text)
+                title = html.xpath('//h1/text()')
+
+                if not title:
+                    logger.warning(f'跳过视频或其他类型新闻: {url}')
+                    continue
+
+                title = title[0]
+                msg += f'Title: {title}. Link: {url}\n'
+                count += 1
+                sleep(0.1)
+
+            except Exception as e:
+                logger.error(f"处理新闻失败: {url}, 错误: {str(e)}")
+                continue
+
+        logger.info(f"NBC新闻获取完成，共获取{count}条")
+        return msg
+
+    except Exception as e:
+        logger.error(f"获取NBC新闻失败: {str(e)}")
+        return "获取新闻失败，请查看日志了解详情"
+
+
+def cnn():
+    logger.info("开始获取CNN新闻")
+    try:
+        head = 'https://www.cnn.com'
+        response = safe_request(head + '/')
+        if not response:
+            return "获取CNN新闻失败"
+
+        html = etree.HTML(response.text)
+        href = html.xpath('//a[@data-link-type="article"]/@href')
+        href = title_tidy(href)
+
+        msg = ''
+        count = 0
+
+        for url in href[:NEWS_LIMIT]:
+            try:
+                full_url = head + url
+                response = safe_request(full_url)
+                if not response:
+                    continue
+
+                html = etree.HTML(response.text)
+                title = html.xpath('//h1[@data-editable="headlineText"]/text()')
+
+                if not title:
+                    logger.warning(f'跳过视频或其他类型新闻: {full_url}')
+                    continue
+
+                title = title[0].strip()
+                msg += f'Title: {title}. Link: {full_url}\n'
+                count += 1
+                sleep(0.1)
+
+            except Exception as e:
+                logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
+                continue
+
+        logger.info(f"CNN新闻获取完成，共获取{count}条")
+        return msg
+
+    except Exception as e:
+        logger.error(f"获取CNN新闻失败: {str(e)}")
+        return "获取新闻失败，请查看日志了解详情"
+
+
+def abc():
+    logger.info("开始获取ABC新闻")
+    try:
+        head = 'https://abcnews.go.com'  # 移除末尾的斜杠
+        response = safe_request(head)
+        if not response:
+            return "获取ABC新闻失败"
+
+        html = etree.HTML(response.text)
+        href1 = html.xpath('//div[@class="HeadlinesTrio"]/a/@href')
+        href2 = html.xpath(
+            '//div[@class="title card"]/a[@class="AnchorLink"]/@href | //div[@class="title"]/a[@class="AnchorLink"]/@href')
+        href3 = html.xpath('//a[@target="_self"]/@href')
+        href4 = html.xpath('//a[@class="AnchorLink VideoTile"]/@href')
+        href = title_tidy(href1 + href2 + href3 + href4)
+
+        msg = ''
+        count = 0
+
+        for url in href[:NEWS_LIMIT]:
+            try:
+                # 处理URL格式
+                if url.startswith('http'):
+                    full_url = url
+                elif url.startswith('//'):
+                    full_url = 'https:' + url
+                else:
+                    full_url = head + ('' if url.startswith('/') else '/') + url
+
+                response = safe_request(full_url)
+                if not response:
+                    continue
+
+                html = etree.HTML(response.text)
+                title = html.xpath('//div[@data-testid="prism-headline"]/h1/text()')
+
+                if not title:
+                    logger.warning(f'跳过视频或其他类型新闻: {full_url}')
+                    continue
+
+                title = title[0]
+                msg += f'Title: {title}. Link: {full_url}\n'
+                count += 1
+                sleep(0.1)
+
+            except Exception as e:
+                logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
+                continue
+
+        logger.info(f"ABC新闻获取完成，共获取{count}条")
+        return msg
+
+    except Exception as e:
+        logger.error(f"获取ABC新闻失败: {str(e)}")
+        return "获取新闻失败，请查看日志了解详情"
+
+
+def fox():
+    logger.info("开始获取FOX新闻")
+    try:
+        head = 'https://www.foxnews.com/'
+        response = safe_request(head)
+        if not response:
+            return "获取FOX新闻失败"
+
+        html = etree.HTML(response.text)
+        href = html.xpath('//h3[@class="title"]/a/@href')
+        href = title_tidy(href)
+
+        msg = ''
+        count = 0
+
+        for url in href[:NEWS_LIMIT]:
+            try:
+                if url[0:4] != 'http':
+                    url = 'https:' + url
+
+                response = safe_request(url)
+                if not response:
+                    continue
+
+                html = etree.HTML(response.text)
+                title = html.xpath('//h1[@itemprop="headline"]/text()')
+
+                if not title:
+                    logger.warning(f'跳过视频或其他类型新闻: {url}')
+                    continue
+
+                title = title[0]
+                msg += f'Title: {title}. Link: {url}\n'
+                count += 1
+                sleep(0.1)
+
+            except Exception as e:
+                logger.error(f"处理新闻失败: {url}, 错误: {str(e)}")
+                continue
+
+        logger.info(f"FOX新闻获取完成，共获取{count}条")
+        return msg
+
+    except Exception as e:
+        logger.error(f"获取FOX新闻失败: {str(e)}")
+        return "获取新闻失败，请查看日志了解详情"
+
+
+def bbc():
+    logger.info("开始获取BBC新闻")
+    try:
+        head = 'https://www.bbc.com'
+        response = safe_request(head + '/')
+        if not response:
+            return "获取BBC新闻失败"
+
+        html = etree.HTML(response.text)
+        href = html.xpath(
+            '//h2[@data-testid="card-headline"]/../../../../../@href | //h2[@data-testid="card-headline"]/../../../../@href')
+        href = title_tidy(href)
+
+        msg = ''
+        count = 0
+
+        for url in href[:NEWS_LIMIT]:
+            try:
+                if url[0:4] == 'http':
+                    continue
+
+                full_url = head + url
+                response = safe_request(full_url)
+                if not response:
+                    continue
+
+                html = etree.HTML(response.text)
+                title = html.xpath('//div[@data-component="headline-block"]/h1/text()')
+
+                if not title:
+                    logger.warning(f'跳过视频或其他类型新闻: {full_url}')
+                    continue
+
+                title = title[0]
+                msg += f'Title: {title}. Link: {full_url}\n'
+                count += 1
+                sleep(0.1)
+
+            except Exception as e:
+                logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
+                continue
+
+        logger.info(f"BBC新闻获取完成，共获取{count}条")
+        return msg
+
+    except Exception as e:
+        logger.error(f"获取BBC新闻失败: {str(e)}")
+        return "获取新闻失败，请查看日志了解详情"