# -*- coding: utf-8 -*- import hashlib import re from typing import Any, Dict, List, Optional, Set, Tuple from urllib.parse import urlparse import aiohttp from loguru import logger from base.plugin_common.message_plugin_interface import MessagePluginInterface from base.plugin_common.plugin_interface import PluginStatus from utils.decorator.plugin_decorators import plugin_stats_decorator from utils.robot_cmd.robot_command import GroupBotManager, PermissionStatus from wechat_ipad.models.message import MessageType class GithubOpenGraphPlugin(MessagePluginInterface): """GitHub 链接 OpenGraph 图片插件。 设计目标: 1. 自动识别消息中的 GitHub 链接; 2. 转换为 OpenGraph 图片地址并发送,便于群聊快速预览; 3. 兼容群权限开关,满足不同群的启停需求。 """ # 功能权限常量:用于接入现有“群插件权限开关”体系。 FEATURE_KEY = "GITHUB_OPENGRAPH" FEATURE_DESCRIPTION = "🧩 GitHub链接卡片 [自动转OpenGraph图片]" # 链接提取正则:提取消息中可能的 GitHub URL。 GITHUB_URL_PATTERN = re.compile(r"https?://(?:www\.)?github\.com/[^\s<>\u3000]+", re.IGNORECASE) # 去除 URL 末尾常见标点,避免“链接后跟句号/括号”导致请求失败。 TRAILING_PUNCTUATION = ".,!?;:,。!?;:'\"`)]}>" @property def name(self) -> str: return "GitHub OpenGraph" @property def version(self) -> str: return "1.0.0" @property def description(self) -> str: return "自动将GitHub链接转换为OpenGraph预览图片。" @property def author(self) -> str: return "ABOT Team" @property def command_prefix(self) -> Optional[str]: # 本插件采用“自动识别链接”模式,不使用命令前缀。 return "" @property def commands(self) -> List[str]: # 本插件不依赖命令触发,返回空列表即可。 return [] @property def feature_key(self) -> Optional[str]: return self.FEATURE_KEY @property def feature_description(self) -> Optional[str]: return self.FEATURE_DESCRIPTION def __init__(self): super().__init__() self.LOG = logger self.enable = True self.match_mode = "fuzzy" self.max_links_per_message = 3 self.hash_salt = "" self.request_timeout_seconds = 15 # 注册插件功能权限,后续在群权限设置中可独立开关。 self.feature = self.register_feature() def initialize(self, context: Dict[str, Any]) -> bool: """初始化插件配置。""" cfg = self._config.get("GithubOpenGraph", {}) self.enable = bool(cfg.get("enable", True)) self.match_mode = str(cfg.get("match_mode", "fuzzy") or "fuzzy").strip().lower() if self.match_mode not in {"fuzzy", "exact"}: self.LOG.warning(f"[{self.name}] match_mode={self.match_mode} 非法,已回退为 fuzzy") self.match_mode = "fuzzy" self.max_links_per_message = max(1, int(cfg.get("max_links_per_message", 3) or 3)) self.hash_salt = str(cfg.get("hash_salt", "") or "") self.request_timeout_seconds = max(3, int(cfg.get("request_timeout_seconds", 15) or 15)) self.LOG.info( f"[{self.name}] 初始化完成: enable={self.enable}, mode={self.match_mode}, " f"max_links={self.max_links_per_message}, timeout={self.request_timeout_seconds}s" ) return True def start(self) -> bool: self.status = PluginStatus.RUNNING return True def stop(self) -> bool: self.status = PluginStatus.STOPPED return True def can_process(self, message: Dict[str, Any]) -> bool: """快速判断是否需要进入处理流程。""" if not self.enable: return False if message.get("type") != MessageType.TEXT: return False content = str(message.get("content", "") or "").strip() if not content: return False # fuzzy 模式:消息内出现 github.com 即可认为可能命中。 if self.match_mode == "fuzzy": return "github.com/" in content.lower() # exact 模式:整条消息必须是可解析的 GitHub 链接。 normalized = self._normalize_github_url(content) return bool(normalized) @plugin_stats_decorator(plugin_name="GitHub OpenGraph") async def process_message(self, message: Dict[str, Any]) -> Tuple[bool, Optional[str]]: """处理文本消息并发送 GitHub OpenGraph 预览图。""" room_id = str(message.get("roomid", "") or "").strip() sender = str(message.get("sender", "") or "").strip() target = room_id if room_id else sender gbm: GroupBotManager = message.get("gbm") bot = message.get("bot") content = str(message.get("content", "") or "").strip() # 群聊场景权限检查:关闭时直接不处理。 if room_id and gbm and self.feature and gbm.get_group_permission(room_id, self.feature) == PermissionStatus.DISABLED: self.LOG.debug(f"[{self.name}] 群权限关闭,跳过: room_id={room_id}") return False, "没有权限" github_links = self._extract_github_links(content) if not github_links: return False, "未匹配到GitHub链接" # 单条消息做去重并限制处理数量,避免多次发送同一张图。 unique_links = self._deduplicate_keep_order(github_links)[: self.max_links_per_message] sent_count = 0 failed_links: List[str] = [] for link in unique_links: og_url = self._build_opengraph_url(link) image_bytes = await self._download_image_bytes(og_url) if not image_bytes: failed_links.append(link) continue try: # send_image_message 支持 bytes,内部会自动转 base64 上传。 await bot.send_image_message(target, image_bytes) sent_count += 1 self.LOG.info(f"[{self.name}] 发送OpenGraph成功: target={target}, link={link}") except Exception as e: failed_links.append(link) self.LOG.error(f"[{self.name}] 发送OpenGraph失败: target={target}, link={link}, error={e}") if sent_count <= 0: return False, f"GitHub链接解析失败,失败数量={len(failed_links)}" summary = f"已发送{sent_count}张GitHub预览图" if failed_links: summary += f",失败{len(failed_links)}条" return True, summary def _extract_github_links(self, content: str) -> List[str]: """根据配置的匹配模式提取 GitHub 链接。""" content = str(content or "").strip() if not content: return [] if self.match_mode == "exact": exact_url = self._normalize_github_url(content) return [exact_url] if exact_url else [] # fuzzy 模式:从文本中抽取所有候选链接。 matches = self.GITHUB_URL_PATTERN.findall(content) normalized: List[str] = [] for raw in matches: url = self._normalize_github_url(raw) if url: normalized.append(url) return normalized def _normalize_github_url(self, raw_url: str) -> str: """标准化 GitHub 链接。 标准化规则: 1. 去掉首尾空白和末尾标点; 2. 仅接受 github.com / www.github.com; 3. 必须包含 path(至少 /owner/repo 级别); 4. 丢弃 query 与 fragment,降低重复预览概率。 """ if not raw_url: return "" cleaned = str(raw_url).strip().rstrip(self.TRAILING_PUNCTUATION) if not cleaned.lower().startswith(("http://", "https://")): return "" try: parsed = urlparse(cleaned) except Exception: return "" host = str(parsed.netloc or "").lower() if host not in {"github.com", "www.github.com"}: return "" # path 至少应包含 owner/repo,避免把 GitHub 首页当成预览链接。 path = str(parsed.path or "").strip() if not path or path == "/": return "" path_parts = [p for p in path.split("/") if p] if len(path_parts) < 2: return "" # 统一域名到 github.com,保留路径原貌。 normalized_path = "/" + "/".join(path_parts) return f"https://github.com{normalized_path}" def _build_opengraph_url(self, github_url: str) -> str: """把 GitHub 链接转换成 OpenGraph 图片链接。""" parsed = urlparse(github_url) path = str(parsed.path or "/") hash_text = f"{self.hash_salt}|{github_url}" if self.hash_salt else github_url digest = hashlib.sha256(hash_text.encode("utf-8")).hexdigest()[:20] return f"https://opengraph.githubassets.com/{digest}{path}" async def _download_image_bytes(self, image_url: str) -> bytes: """下载 OpenGraph 图片,失败时返回空字节。""" timeout = aiohttp.ClientTimeout(total=self.request_timeout_seconds) try: async with aiohttp.ClientSession(timeout=timeout) as session: async with session.get(image_url) as resp: if resp.status != 200: self.LOG.warning(f"[{self.name}] 拉取OpenGraph失败: status={resp.status}, url={image_url}") return b"" content_type = str(resp.headers.get("Content-Type", "") or "").lower() if "image" not in content_type: self.LOG.warning( f"[{self.name}] 返回内容不是图片: content_type={content_type}, url={image_url}" ) return b"" return await resp.read() except Exception as e: self.LOG.warning(f"[{self.name}] 拉取OpenGraph异常: url={image_url}, error={e}") return b"" @staticmethod def _deduplicate_keep_order(items: List[str]) -> List[str]: """按顺序去重,保持原始出现顺序。""" seen: Set[str] = set() result: List[str] = [] for item in items: if item in seen: continue seen.add(item) result.append(item) return result