feature:全球政治经济新闻

2025-04-14 16:02:00 +08:00
parent bfdb1831d3
commit 8afd0f49d0
8 changed files with 885 additions and 220 deletions
--- a/base/func_english_news.py
+++ b/base/func_english_news.py
@@ -6,11 +6,35 @@ Created Date: 2024-01-21
 Last Modified: 2024-03-24
 Modified by: MrCrawL
 """
 from utils.ai.dify_news_analyze import dify_news_title_analyze
 from utils.markdown_to_image import convert_md_str_to_image
 '''Existing problem: text with hyperlink won't be saved'''
 import requests
 from time import localtime, sleep
 from lxml import etree
 import logging
 from datetime import datetime
 # 配置日志
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(f'news_crawler_{datetime.now().strftime("%Y%m%d")}.log'),
        logging.StreamHandler()
    ]
 )
 logger = logging.getLogger(__name__)
 # 请求配置
 HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
 }
 TIMEOUT = 10
 MAX_RETRIES = 3
 NEWS_LIMIT = 30
 def get_time():
@@ -18,7 +42,6 @@ def get_time():
    return date_
 # delete duplicated
 def title_tidy(title_list):
    t_index = []
    for i in range(1, len(title_list)):
@@ -28,7 +51,6 @@ def title_tidy(title_list):
    return title_list
 # tidy text, seems a little bit redundant
 def text_tidy(p_text):
    text_ = p_text.replace('’', "'")
    text_ = text_.replace(' \n\n', ' ')
@@ -46,218 +68,275 @@ def text_tidy(p_text):
    return text_
-def save(text, file_name, mode='w', encoding='utf-8'):
+def safe_request(url, retry_count=0):
-    with open(f'{file_name}.txt', mode, encoding=encoding) as f: f.write(text)
+    """安全的请求方法，包含重试机制"""
    try:
        response = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
        response.raise_for_status()
        return response
    except requests.RequestException as e:
        if retry_count < MAX_RETRIES:
            logger.warning(f"请求失败，正在进行第{retry_count + 1}次重试: {url}")
            sleep(1)
            return safe_request(url, retry_count + 1)
        else:
            logger.error(f"请求失败: {url}, 错误: {str(e)}")
            return None
 def nbc():
    logger.info("开始获取NBC新闻")
    try:
        url = 'https://www.nbcnews.com/'
-    res = requests.get(url)
+        response = safe_request(url)
-    html = etree.HTML(res.text)
+        if not response:
            return "获取NBC新闻失败"
        html = etree.HTML(response.text)
        href = html.xpath('//h2/a/@href')
        href = title_tidy(href)
-    # quant = int(input(f'There are {len(href)} pieces detected. How many would you download:'))
+
-    # if quant > len(href) or quant < 1:
+        msg = ''
    #     print("Outnumber!")
    #     quit()
        count = 0
-    # save('', f'NBC_news_title_{get_time()}')
+
-    # save('', f'NBC_news_text_{get_time()}')
+        for url in href[:NEWS_LIMIT]:
-    msg =''
+            try:
-    for i in range(30):
+                response = safe_request(url)
-        url = href[i]
+                if not response:
        sleep(0.1)  # delete to speed up
        res = requests.get(url)
        html = etree.HTML(res.text)
        title = html.xpath('//h1/text()')
        if len(title) == 0:
            print(f'Video or other news. Link: {url}')
                    continue
                html = etree.HTML(response.text)
                title = html.xpath('//h1/text()')
                if not title:
                    logger.warning(f'跳过视频或其他类型新闻: {url}')
                    continue
                title = title[0]
-        author = html.xpath('//span[@class="byline-name"]/a/text() | //span[@class="byline-name" and not(a)]/text()')
+                msg += f'Title: {title}. Link: {url}\n'
        author = ', '.join(author)
        text = html.xpath('//p[@class=""]/text()')
        text = '\n\n'.join(text)
        text = text_tidy(text)
                count += 1
-        # save(f'Title: {title}\nLink: {url}\n\n', f'NBC_news_title_{get_time()}', 'a')  # news title
+                sleep(0.1)
-        # save(f'Title: {title}\n\nOrigin: {url}\n\nAuthor: {author}\n\n\n', f'NBC_news_text_{get_time()}', 'a')
+
-        # save(f'{text}' + '\n\n------------------------------\n\n', f'NBC_news_text_{get_time()}', 'a')
+            except Exception as e:
-        # print(f'Title: {title}. Link: {href[i]}.')
+                logger.error(f"处理新闻失败: {url}, 错误: {str(e)}")
-        msg += f'Title: {title}. Link: {href[i]}.\n'
+                continue
        logger.info(f"NBC新闻获取完成，共获取{count}条")
        return msg
    except Exception as e:
        logger.error(f"获取NBC新闻失败: {str(e)}")
        return "获取新闻失败，请查看日志了解详情"
 def cnn():
    logger.info("开始获取CNN新闻")
    try:
        head = 'https://www.cnn.com'
-    res = requests.get(head + '/')
+        response = safe_request(head + '/')
-    html = etree.HTML(res.text)
+        if not response:
            return "获取CNN新闻失败"
        html = etree.HTML(response.text)
        href = html.xpath('//a[@data-link-type="article"]/@href')
        href = title_tidy(href)
-    # quant = int(input(f'{len(href)} data detected. How many would you like to download:'))
+
    # if quant > len(href) or quant < 1:
    #     print("Outnumber!")
    #     quit()
    count = 0
        msg = ''
-    # save('', f'CNN_news_title_{get_time()}')
+        count = 0
-    # save('', f'CNN_news_text_{get_time()}')
+
-    for i in range(30):
+        for url in href[:NEWS_LIMIT]:
-        url = head + href[i]
+            try:
-        sleep(0.1)  # delete to speed up
+                full_url = head + url
-        res = requests.get(url)
+                response = safe_request(full_url)
-        html = etree.HTML(res.text)
+                if not response:
        title = html.xpath('//h1[@data-editable="headlineText"]/text()')
        if len(title) == 0:
            print(f'Video or other news. Link: {url}')
                    continue
                html = etree.HTML(response.text)
                title = html.xpath('//h1[@data-editable="headlineText"]/text()')
                if not title:
                    logger.warning(f'跳过视频或其他类型新闻: {full_url}')
                    continue
                title = title[0].strip()
-        author = html.xpath('//span[@class="byline__name"]/text()')
+                msg += f'Title: {title}. Link: {full_url}\n'
        author = ', '.join(author)
        text = html.xpath('//p[@class="paragraph inline-placeholder"]/text()')
        for k in range(len(text)): text[k].strip()
        text = ''.join(text)
        text = text_tidy(text)
                count += 1
-        # save(f'Title: {title}\nLink: {url}\n\n', f'CNN_news_title_{get_time()}', 'a')  # news title
+                sleep(0.1)
-        # save(f'Title: {title}\n\nOrigin: {url}\n\nAuthor: {author}\n\n\n', f'CNN_news_text_{get_time()}', 'a')
+
-        # save(f'{text}' + '\n\n------------------------------\n\n', f'CNN_news_text_{get_time()}', 'a')
+            except Exception as e:
-        # print(f'Title: {title}. Link: {url}')
+                logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
-        msg +=f'Title: {title}. Link: {url}\n'
+                continue
-    # print(f'Files saved with {count} articles available.')
+
        logger.info(f"CNN新闻获取完成，共获取{count}条")
        return msg
    except Exception as e:
        logger.error(f"获取CNN新闻失败: {str(e)}")
        return "获取新闻失败，请查看日志了解详情"
 def abc():
-    head = 'https://abcnews.go.com/'
+    logger.info("开始获取ABC新闻")
-    res = requests.get(head)
+    try:
-    html = etree.HTML(res.text)
+        head = 'https://abcnews.go.com'  # 移除末尾的斜杠
        response = safe_request(head)
        if not response:
            return "获取ABC新闻失败"
        html = etree.HTML(response.text)
        href1 = html.xpath('//div[@class="HeadlinesTrio"]/a/@href')
-    href2 = html.xpath('//div[@class="title card"]/a[@class="AnchorLink"]/@href | //div[@class="title"]/a[@class="AnchorLink"]/@href')
+        href2 = html.xpath(
            '//div[@class="title card"]/a[@class="AnchorLink"]/@href | //div[@class="title"]/a[@class="AnchorLink"]/@href')
        href3 = html.xpath('//a[@target="_self"]/@href')
        href4 = html.xpath('//a[@class="AnchorLink VideoTile"]/@href')
-    href = href1 + href2 + href3 + href4
+        href = title_tidy(href1 + href2 + href3 + href4)
-    href = title_tidy(href)
+
    # quant = int(input(f'{len(href)} data detected. How many would you like to download:'))
    # if quant > len(href) or quant < 1:
    #     print("Outnumber!")
    #     quit()
    count = 0
        msg = ''
-    # save('', f'ABC_news_title_{get_time()}')
+        count = 0
-    # save('', f'ABC_news_text_{get_time()}')
+
-    for i in range(30):
+        for url in href[:NEWS_LIMIT]:
-        url = href[i]
+            try:
-        sleep(0.1)  # delete to speed up
+                # 处理URL格式
-        res = requests.get(url)
+                if url.startswith('http'):
-        html = etree.HTML(res.text)
+                    full_url = url
-        title = html.xpath('//div[@data-testid="prism-headline"]/h1/text()')
+                elif url.startswith('//'):
-        if len(title) == 0:
+                    full_url = 'https:' + url
-            print(f'Video or other news. Link: {url}')
+                else:
                    full_url = head + ('' if url.startswith('/') else '/') + url
                response = safe_request(full_url)
                if not response:
                    continue
                html = etree.HTML(response.text)
                title = html.xpath('//div[@data-testid="prism-headline"]/h1/text()')
                if not title:
                    logger.warning(f'跳过视频或其他类型新闻: {full_url}')
                    continue
                title = title[0]
-        author = html.xpath('//a[@data-testid="prism-linkbase"]/text()')
+                msg += f'Title: {title}. Link: {full_url}\n'
        author = ', '.join(author)
        text = html.xpath('//div[@data-testid="prism-article-body"]/p/text()')
        text = '\n\n'.join(text)
        text = text_tidy(text)
                count += 1
-        # save(f'Title: {title}\nLink: {url}\n\n', f'ABC_news_title_{get_time()}', 'a')  # news title
+                sleep(0.1)
-        # save(f'Title: {title}\n\nOrigin: {url}\n\nAuthor: {author}\n\n\n', f'ABC_news_text_{get_time()}', 'a')
+
-        # save(f'{text}' + '\n\n------------------------------\n\n', f'ABC_news_text_{get_time()}', 'a')
+            except Exception as e:
-        # print(f'Title: {title}. Link: {url}')
+                logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
-        msg +=f'Title: {title}. Link: {url}\n'
+                continue
-    # print(f'Files saved with {count} articles available.')
+
        logger.info(f"ABC新闻获取完成，共获取{count}条")
        return msg
    except Exception as e:
        logger.error(f"获取ABC新闻失败: {str(e)}")
        return "获取新闻失败，请查看日志了解详情"
 def fox():
    logger.info("开始获取FOX新闻")
    try:
        head = 'https://www.foxnews.com/'
-    res = requests.get(head)
+        response = safe_request(head)
-    html = etree.HTML(res.text)
+        if not response:
            return "获取FOX新闻失败"
        html = etree.HTML(response.text)
        href = html.xpath('//h3[@class="title"]/a/@href')
        href = title_tidy(href)
-    # quant = int(input(f'{len(href)} data detected. How many would you like to download:'))
+
-    # if quant > len(href) or quant < 1:
+        msg = ''
    #     print("Outnumber!")
    #     quit()
        count = 0
-    msg =''
+
-    # save('', f'FOX_news_title_{get_time()}')
+        for url in href[:NEWS_LIMIT]:
-    # save('', f'FOX_news_text_{get_time()}')
+            try:
-    for i in range(30):
+                if url[0:4] != 'http':
-        if href[i][0:4] != 'http': href[i] = 'https:' + href[i]
+                    url = 'https:' + url
-        url = href[i]
+
-        sleep(0.1)  # delete to speed up
+                response = safe_request(url)
-        res = requests.get(url)
+                if not response:
        html = etree.HTML(res.text)
        title = html.xpath('//h1[@itemprop="headline"]/text()')
        if len(title) == 0:
            print(f'Video or other news. Link: {url}')
                    continue
                html = etree.HTML(response.text)
                title = html.xpath('//h1[@itemprop="headline"]/text()')
                if not title:
                    logger.warning(f'跳过视频或其他类型新闻: {url}')
                    continue
                title = title[0]
-        author = html.xpath('//a[@rel="author"]/strong/text()')
+                msg += f'Title: {title}. Link: {url}\n'
        author = ', '.join(author)
        text = html.xpath('//div[@itemprop="articleBody"]/p/text()')
        text = '\n\n'.join(text)
        text = text_tidy(text)
                count += 1
-        # save(f'Title: {title}\nLink: {url}\n\n', f'FOX_news_title_{get_time()}', 'a')  # news title
+                sleep(0.1)
-        # save(f'Title: {title}\n\nOrigin: {url}\n\nAuthor: {author}\n\n\n', f'FOX_news_text_{get_time()}', 'a')
+
-        # save(f'{text}' + '\n\n------------------------------\n\n', f'FOX_news_text_{get_time()}', 'a')
+            except Exception as e:
-        # print(f'Title: {title}. Link: {url}')
+                logger.error(f"处理新闻失败: {url}, 错误: {str(e)}")
-        msg +=f'Title: {title}. Link: {url}\n'
+                continue
-    # print(f'Files saved with {count} articles available.')
+
        logger.info(f"FOX新闻获取完成，共获取{count}条")
        return msg
    except Exception as e:
        logger.error(f"获取FOX新闻失败: {str(e)}")
        return "获取新闻失败，请查看日志了解详情"
 def bbc():
    logger.info("开始获取BBC新闻")
    try:
        head = 'https://www.bbc.com'
-    res = requests.get(head + '/')
+        response = safe_request(head + '/')
-    html = etree.HTML(res.text)
+        if not response:
-    href = html.xpath('//h2[@data-testid="card-headline"]/../../../../../@href | //h2[@data-testid="card-headline"]/../../../../@href')
+            return "获取BBC新闻失败"
    href = title_tidy(href)
    # quant = int(input(f'{len(href)} data detected. How many would you like to download:'))
    # if quant > len(href) or quant < 1:
    #     print("Outnumber!")
    #     quit()
    count = 0
    msg =''
    # save('', f'BBC_news_title_{get_time()}')
    # save('', f'BBC_news_text_{get_time()}')
    for i in range(30):
        if href[i][0:4] == 'http': continue
        url = head + href[i]
        sleep(0.1)  # delete to speed up
        print(url)
        res = requests.get(url)
        html = etree.HTML(res.text)
        title = html.xpath('//div[@data-component="headline-block"]/h1/text()')
        if len(title) == 0:
            # print(f'Video or other news. Link: {url}')
            continue
        title = title[0]
        # author = html.xpath('//div[@data-testid="byline"]/div/span[@data-testid="byline-name"]/text()')
        # author = ', '.join(author)
        # text = html.xpath('//div[@data-component="text-block"]/p/b/text() | //div[@data-component="text-block"]/p/text()')
        # text = '\n\n'.join(text)
        # text = text_tidy(text)
        count += 1
        # save(f'Title: {title}\nLink: {url}\n\n', f'BBC_news_title_{get_time()}', 'a')  # news title
        # save(f'Title: {title}\n\nOrigin: {url}\n\nAuthor: {author}\n\n\n', f'BBC_news_text_{get_time()}', 'a')
        # save(f'{text}' + '\n\n------------------------------\n\n', f'BBC_news_text_{get_time()}', 'a')
        # print(f'Title: {title}. Link: {url}')
-        msg +=f'Title: {title}. Link: {url}\n'
+        html = etree.HTML(response.text)
-    # print(f'Files saved with {count} articles available.')
+        href = html.xpath(
            '//h2[@data-testid="card-headline"]/../../../../../@href | //h2[@data-testid="card-headline"]/../../../../@href')
        href = title_tidy(href)
        msg = ''
        count = 0
        for url in href[:NEWS_LIMIT]:
            try:
                if url[0:4] == 'http':
                    continue
                full_url = head + url
                response = safe_request(full_url)
                if not response:
                    continue
                html = etree.HTML(response.text)
                title = html.xpath('//div[@data-component="headline-block"]/h1/text()')
                if not title:
                    logger.warning(f'跳过视频或其他类型新闻: {full_url}')
                    continue
                title = title[0]
                msg += f'Title: {title}. Link: {full_url}\n'
                count += 1
                sleep(0.1)
            except Exception as e:
                logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
                continue
        logger.info(f"BBC新闻获取完成，共获取{count}条")
        return msg
-if __name__ == '__main__':
+    except Exception as e:
-    # Hello, World! :)
+        logger.error(f"获取BBC新闻失败: {str(e)}")
-    # news = input('Choose news site["nbc","cnn","abc","fox","bbc"]:').lower()
+        return "获取新闻失败，请查看日志了解详情"
-    # if news == 'nbc': nbc()
+
-    # elif news == 'cnn': cnn()
+
-    # elif news == 'abc': abc()
+def all_english_news():
-    # elif news == 'fox': fox()
+    news_titles = ""
-    # elif news == 'bbc': bbc()
+    news_titles += nbc() + "\n"
-    # else:
+    news_titles += cnn() + "\n"
-    #     print('Oops! It seems a wrong input. Please retry...')
+    news_titles += abc() + "\n"
-    #     sleep(2)
+    news_titles += fox() + "\n"
-    print(bbc())
+    news_titles += bbc() + "\n"
    markdown_news = dify_news_title_analyze(news_titles)
    spath = convert_md_str_to_image(markdown_news, "news_output.png")
    return spath
--- a/base/func_news.py
+++ b/base/func_news.py
@@ -11,6 +11,7 @@ import requests
 from lxml import etree
 from base import func_english_news
 from utils.ai.dify_news_analyze import dify_news_title_analyze
 class News(object):
@@ -92,7 +93,7 @@ class News(object):
            self.LOG.error(f"获取百度新闻时出错: {e}")
            return f"获取百度新闻时出错: {e}"
-    def get_eng_news(self,website):
+    def get_eng_news(self, website):
        if website == 'nbc':
            return func_english_news.nbc()
        elif website == 'cnn':
@@ -105,9 +106,9 @@ class News(object):
            return func_english_news.bbc()
 if __name__ == "__main__":
    news = News()
    print(news.get_baidu_news())
    # # msg = "@水牛-分身 今日百度新闻"
    # # q = re.sub(r"@.*?[\u2005|\s]", "", msg).replace(" ", "")
    # # print(q)
--- a/plugins/global_news/init.py
+++ b/plugins/global_news/init.py
@@ -0,0 +1,7 @@
 # 从当前包的main模块导入GlobalNewsPlugin类
 from .main import GlobalNewsPlugin
 # 提供get_plugin函数，返回插件实例
 def get_plugin():
    """获取插件实例"""
    return GlobalNewsPlugin()
--- a/plugins/global_news/config.toml
+++ b/plugins/global_news/config.toml
@@ -0,0 +1,6 @@
 enable = true
 command = ["全球新闻", "国际新闻", "环球新闻", "政经新闻", "政治经济新闻"]
 command-format = """
 🌍全球新闻指令：
 全球新闻 - 获取最新的全球政治经济新闻
 """
--- a/plugins/global_news/main.py
+++ b/plugins/global_news/main.py
@@ -0,0 +1,191 @@
 import logging
 import asyncio
 import threading
 import time  # 添加这一行
 from typing import Dict, Any, List, Optional, Tuple
 from wcferry import Wcf
 from plugin_common.message_plugin_interface import MessagePluginInterface
 from plugin_common.plugin_interface import PluginStatus
 from utils.decorator.plugin_decorators import plugin_stats_decorator
 from utils.robot_cmd.robot_command import Feature, PermissionStatus, GroupBotManager
 from utils.decorator.points_decorator import plugin_points_cost
 from utils.ai.dify_news_analyze import dify_news_title_analyze
 from utils.markdown_to_image import convert_md_str_to_image
 # 导入新闻抓取函数
 from .news_crawler import nbc, cnn, abc, fox, bbc
 class GlobalNewsPlugin(MessagePluginInterface):
    """全球政治经济新闻插件"""
    @property
    def name(self) -> str:
        return "全球政治经济新闻"
    @property
    def version(self) -> str:
        return "1.0.0"
    @property
    def description(self) -> str:
        return "提供全球政治经济新闻，支持多个国际新闻源"
    @property
    def author(self) -> str:
        return "Trae AI"
    @property
    def command_prefix(self) -> Optional[str]:
        return ""  # 不需要前缀，直接匹配命令
    @property
    def commands(self) -> List[str]:
        return self._commands
    def __init__(self):
        super().__init__()
        self._news_tasks = {}  # 存储正在进行的新闻抓取任务
    def initialize(self, context: Dict[str, Any]) -> bool:
        """初始化插件"""
        self.LOG = logging.getLogger(f"Plugin.{self.name}")
        self.LOG.info(f"正在初始化 {self.name} 插件...")
        # 保存上下文对象
        self.wcf = context.get("wcf")
        self.event_system = context.get("event_system")
        self.message_util = context.get("message_util")
        self._commands = self._config.get("GlobalNews", {}).get("command", ["全球新闻", "国际新闻", "环球新闻", "政经新闻"])
        self.command_format = self._config.get("GlobalNews", {}).get("command-format", "全球新闻 - 获取最新的全球政治经济新闻")
        self.enable = self._config.get("GlobalNews", {}).get("enable", True)
        self.LOG.info(f"[{self.name}] 插件初始化完成，指令：{self._commands}")
        return True
    def start(self) -> bool:
        """启动插件"""
        self.LOG.info(f"[{self.name}] 插件已启动")
        self.status = PluginStatus.RUNNING
        return True
    def stop(self) -> bool:
        """停止插件"""
        self.LOG.info(f"[{self.name}] 插件已停止")
        self.status = PluginStatus.STOPPED
        return True
    def can_process(self, message: Dict[str, Any]) -> bool:
        """检查是否可以处理该消息"""
        if not self.enable:
            return False
        content = str(message.get("content", "")).strip()
        command = content.split(" ")[0]
        return command in self._commands
    @plugin_stats_decorator(plugin_name="全球政治经济新闻")
    @plugin_points_cost(5, "全球新闻消耗积分", Feature.NEWS)
    def process_message(self, message: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
        """处理消息"""
        content = str(message.get("content", "")).strip()
        self.LOG.info(f"插件执行： {self.name}：{content}")
        sender = message.get("sender")
        roomid = message.get("roomid", "")
        wcf: Wcf = message.get("wcf")
        gbm: GroupBotManager = message.get("gbm")
        # 检查权限
        if roomid and gbm.get_group_permission(roomid, Feature.NEWS) == PermissionStatus.DISABLED:
            return False, "没有权限"
        # 生成唯一任务ID
        task_id = f"{sender}_{roomid}_{int(time.time())}"
        # 发送等待消息
        wcf.send_text("🌍正在获取全球新闻，请稍候...", 
                      (roomid if roomid else sender), sender)
        # 启动异步任务
        self._start_news_task(task_id, sender, roomid, wcf)
        return True, "新闻获取任务已启动"
    def _start_news_task(self, task_id: str, sender: str, roomid: str, wcf: Wcf):
        """启动异步新闻获取任务"""
        thread = threading.Thread(
            target=self._fetch_news_thread,
            args=(task_id, sender, roomid, wcf)
        )
        thread.daemon = True
        thread.start()
        self._news_tasks[task_id] = thread
        self.LOG.info(f"启动新闻获取任务: {task_id}")
    def _fetch_news_thread(self, task_id: str, sender: str, roomid: str, wcf: Wcf):
        """在单独的线程中运行异步新闻获取任务"""
        try:
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            news_result = loop.run_until_complete(self._fetch_news_async())
            loop.close()
            # 处理结果
            if news_result:
                # 发送新闻图片
                receiver = roomid if roomid else sender
                wcf.send_image(news_result, receiver)
                wcf.send_text("🌍全球新闻获取完成！", receiver, sender)
            else:
                wcf.send_text("❌获取新闻失败，请稍后再试", 
                             (roomid if roomid else sender), sender)
        except Exception as e:
            self.LOG.error(f"新闻获取任务出错: {e}")
            wcf.send_text(f"❌获取新闻出错: {str(e)}", 
                         (roomid if roomid else sender), sender)
        finally:
            # 清理任务
            if task_id in self._news_tasks:
                del self._news_tasks[task_id]
    async def _fetch_news_async(self) -> str:
        """异步获取所有新闻源的新闻"""
        try:
            # 创建所有新闻源的任务
            tasks = [
                self._run_in_executor(nbc),
                self._run_in_executor(cnn),
                self._run_in_executor(abc),
                self._run_in_executor(fox),
                self._run_in_executor(bbc)
            ]
            # 并行执行所有任务
            results = await asyncio.gather(*tasks)
            # 合并结果
            news_titles = "\n".join(results)
            # 使用AI分析新闻
            markdown_news = await self._run_in_executor(
                dify_news_title_analyze, news_titles
            )
            # 转换为图片
            image_path = await self._run_in_executor(
                convert_md_str_to_image, markdown_news, "news_output.png"
            )
            return image_path
        except Exception as e:
            self.LOG.error(f"异步获取新闻失败: {e}")
            return ""
    async def _run_in_executor(self, func, *args):
        """在线程池中运行同步函数"""
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(None, func, *args)
--- a/plugins/global_news/news_crawler.py
+++ b/plugins/global_news/news_crawler.py
@@ -0,0 +1,307 @@
 # -*- coding: utf-8 -*-
 """
 Program: Global News Crawler
 Author: Trae AI (based on MrCrawL's work)
 Created Date: 2024-05-01
 """
 import requests
 from time import localtime, sleep
 from lxml import etree
 import logging
 from datetime import datetime
 import time
 # 配置日志
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(f'global_news_{datetime.now().strftime("%Y%m%d")}.log'),
        logging.StreamHandler()
    ]
 )
 logger = logging.getLogger(__name__)
 # 请求配置
 HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
 }
 TIMEOUT = 10
 MAX_RETRIES = 3
 NEWS_LIMIT = 30
 def get_time():
    date_ = f'{str(localtime().tm_year).zfill(4)}-{str(localtime().tm_mon).zfill(2)}-{str(localtime().tm_mday).zfill(2)}'
    return date_
 def title_tidy(title_list):
    t_index = []
    for i in range(1, len(title_list)):
        if title_list[i] == title_list[i - 1]: t_index.append(i)
    t_index.reverse()
    for i in range(len(t_index)): title_list.pop(t_index[i])
    return title_list
 def safe_request(url, retry_count=0):
    """安全的请求方法，包含重试机制"""
    try:
        response = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
        response.raise_for_status()
        return response
    except requests.RequestException as e:
        if retry_count < MAX_RETRIES:
            logger.warning(f"请求失败，正在进行第{retry_count + 1}次重试: {url}")
            sleep(1)
            return safe_request(url, retry_count + 1)
        else:
            logger.error(f"请求失败: {url}, 错误: {str(e)}")
            return None
 def nbc():
    logger.info("开始获取NBC新闻")
    try:
        url = 'https://www.nbcnews.com/'
        response = safe_request(url)
        if not response:
            return "获取NBC新闻失败"
        html = etree.HTML(response.text)
        href = html.xpath('//h2/a/@href')
        href = title_tidy(href)
        msg = ''
        count = 0
        for url in href[:NEWS_LIMIT]:
            try:
                response = safe_request(url)
                if not response:
                    continue
                html = etree.HTML(response.text)
                title = html.xpath('//h1/text()')
                if not title:
                    logger.warning(f'跳过视频或其他类型新闻: {url}')
                    continue
                title = title[0]
                msg += f'Title: {title}. Link: {url}\n'
                count += 1
                sleep(0.1)
            except Exception as e:
                logger.error(f"处理新闻失败: {url}, 错误: {str(e)}")
                continue
        logger.info(f"NBC新闻获取完成，共获取{count}条")
        return msg
    except Exception as e:
        logger.error(f"获取NBC新闻失败: {str(e)}")
        return "获取新闻失败，请查看日志了解详情"
 def cnn():
    logger.info("开始获取CNN新闻")
    try:
        head = 'https://www.cnn.com'
        response = safe_request(head + '/')
        if not response:
            return "获取CNN新闻失败"
        html = etree.HTML(response.text)
        href = html.xpath('//a[@data-link-type="article"]/@href')
        href = title_tidy(href)
        msg = ''
        count = 0
        for url in href[:NEWS_LIMIT]:
            try:
                full_url = head + url
                response = safe_request(full_url)
                if not response:
                    continue
                html = etree.HTML(response.text)
                title = html.xpath('//h1[@data-editable="headlineText"]/text()')
                if not title:
                    logger.warning(f'跳过视频或其他类型新闻: {full_url}')
                    continue
                title = title[0].strip()
                msg += f'Title: {title}. Link: {full_url}\n'
                count += 1
                sleep(0.1)
            except Exception as e:
                logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
                continue
        logger.info(f"CNN新闻获取完成，共获取{count}条")
        return msg
    except Exception as e:
        logger.error(f"获取CNN新闻失败: {str(e)}")
        return "获取新闻失败，请查看日志了解详情"
 def abc():
    logger.info("开始获取ABC新闻")
    try:
        head = 'https://abcnews.go.com'  # 移除末尾的斜杠
        response = safe_request(head)
        if not response:
            return "获取ABC新闻失败"
        html = etree.HTML(response.text)
        href1 = html.xpath('//div[@class="HeadlinesTrio"]/a/@href')
        href2 = html.xpath(
            '//div[@class="title card"]/a[@class="AnchorLink"]/@href | //div[@class="title"]/a[@class="AnchorLink"]/@href')
        href3 = html.xpath('//a[@target="_self"]/@href')
        href4 = html.xpath('//a[@class="AnchorLink VideoTile"]/@href')
        href = title_tidy(href1 + href2 + href3 + href4)
        msg = ''
        count = 0
        for url in href[:NEWS_LIMIT]:
            try:
                # 处理URL格式
                if url.startswith('http'):
                    full_url = url
                elif url.startswith('//'):
                    full_url = 'https:' + url
                else:
                    full_url = head + ('' if url.startswith('/') else '/') + url
                response = safe_request(full_url)
                if not response:
                    continue
                html = etree.HTML(response.text)
                title = html.xpath('//div[@data-testid="prism-headline"]/h1/text()')
                if not title:
                    logger.warning(f'跳过视频或其他类型新闻: {full_url}')
                    continue
                title = title[0]
                msg += f'Title: {title}. Link: {full_url}\n'
                count += 1
                sleep(0.1)
            except Exception as e:
                logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
                continue
        logger.info(f"ABC新闻获取完成，共获取{count}条")
        return msg
    except Exception as e:
        logger.error(f"获取ABC新闻失败: {str(e)}")
        return "获取新闻失败，请查看日志了解详情"
 def fox():
    logger.info("开始获取FOX新闻")
    try:
        head = 'https://www.foxnews.com/'
        response = safe_request(head)
        if not response:
            return "获取FOX新闻失败"
        html = etree.HTML(response.text)
        href = html.xpath('//h3[@class="title"]/a/@href')
        href = title_tidy(href)
        msg = ''
        count = 0
        for url in href[:NEWS_LIMIT]:
            try:
                if url[0:4] != 'http':
                    url = 'https:' + url
                response = safe_request(url)
                if not response:
                    continue
                html = etree.HTML(response.text)
                title = html.xpath('//h1[@itemprop="headline"]/text()')
                if not title:
                    logger.warning(f'跳过视频或其他类型新闻: {url}')
                    continue
                title = title[0]
                msg += f'Title: {title}. Link: {url}\n'
                count += 1
                sleep(0.1)
            except Exception as e:
                logger.error(f"处理新闻失败: {url}, 错误: {str(e)}")
                continue
        logger.info(f"FOX新闻获取完成，共获取{count}条")
        return msg
    except Exception as e:
        logger.error(f"获取FOX新闻失败: {str(e)}")
        return "获取新闻失败，请查看日志了解详情"
 def bbc():
    logger.info("开始获取BBC新闻")
    try:
        head = 'https://www.bbc.com'
        response = safe_request(head + '/')
        if not response:
            return "获取BBC新闻失败"
        html = etree.HTML(response.text)
        href = html.xpath(
            '//h2[@data-testid="card-headline"]/../../../../../@href | //h2[@data-testid="card-headline"]/../../../../@href')
        href = title_tidy(href)
        msg = ''
        count = 0
        for url in href[:NEWS_LIMIT]:
            try:
                if url[0:4] == 'http':
                    continue
                full_url = head + url
                response = safe_request(full_url)
                if not response:
                    continue
                html = etree.HTML(response.text)
                title = html.xpath('//div[@data-component="headline-block"]/h1/text()')
                if not title:
                    logger.warning(f'跳过视频或其他类型新闻: {full_url}')
                    continue
                title = title[0]
                msg += f'Title: {title}. Link: {full_url}\n'
                count += 1
                sleep(0.1)
            except Exception as e:
                logger.error(f"处理新闻失败: {full_url}, 错误: {str(e)}")
                continue
        logger.info(f"BBC新闻获取完成，共获取{count}条")
        return msg
    except Exception as e:
        logger.error(f"获取BBC新闻失败: {str(e)}")
        return "获取新闻失败，请查看日志了解详情"
--- a/utils/ai/dify_news_analyze.py
+++ b/utils/ai/dify_news_analyze.py
@@ -0,0 +1,73 @@
 #
 # curl -X POST 'http://192.168.2.240/v1/chat-messages' \
 # --header 'Authorization: Bearer {api_key}' \
 # --header 'Content-Type: application/json' \
 # --data-raw '{
 #     "inputs": {},
 #     "query": "What are the specs of the iPhone 13 Pro Max?",
 #     "response_mode": "streaming",
 #     "conversation_id": "",
 #     "user": "abc-123",
 #     "files": [
 #       {
 #         "type": "image",
 #         "transfer_method": "remote_url",
 #         "url": "https://cloud.dify.ai/logo/logo-site.png"
 #       }
 #     ]
 # }'
 import json
 import requests
 def dify_news_title_analyze(content):
    # 设置Authorization和URL
    authorization = "Bearer app-rhhKkbvHd2IAQoGX7xTzXZJj"  # 请替换为真实的Authorization token
    url = 'http://192.168.2.240/v1/chat-messages'
    data = {
        "response_mode": "blocking",
        "conversation_id": "",
        "inputs": {},
        "query": content,
        "user": "a-bot"
    }
    # 设置请求头
    headers = {
        "Content-Type": "application/json; charset=utf-8",
        "Authorization": authorization
    }
    # 发送POST请求
    response = requests.post(url, headers=headers, data=json.dumps(data), )
    response.encoding = 'utf-8'
    # 输出响应内容
    print(response.status_code)
    print(response.json())
    return extract_content(response.json())
 def extract_content(data):
    """解析API响应内容
    Args:
        data: API返回的响应数据，可以是字典或字符串
    Returns:
        str: 提取的answer内容
    """
    try:
        # 如果是字符串，尝试解析为字典
        if isinstance(data, str):
            data = json.dumps(data)
        # 如果是字典，直接获取answer
        if isinstance(data, dict):
            answer = data.get('answer', '')
            if answer:
                return answer
        return None
    except Exception as e:
        print(f"解析响应失败: {str(e)}")
        return None
--- a/utils/robot_cmd/robot_command.py
+++ b/utils/robot_cmd/robot_command.py
@@ -43,7 +43,8 @@ class Feature(Enum):
    GROUP_ADD = 16, "加群提醒功能"
    DOUYIN_PARSER = 17, "抖音链接转视频功能"
    GROUP_MEMBER_CHANGE = 18, "群成员变更提醒功能"
-    KID_PHOTO_EXTRACT =19, "儿童照片提取转发功能"  # 小朋友照片提取功能
+    KID_PHOTO_EXTRACT = 19, "儿童照片提取转发功能"  # 小朋友照片提取功能
    NEWS = 20, "全球政治经济新闻"
    def __new__(cls, value, description):
        obj = object.__new__(cls)