变更项: 1. 新增 github_opengraph 插件主逻辑,支持 fuzzy/exact 两种匹配模式。 2. 新增群功能权限注册(GITHUB_OPENGRAPH),对齐现有群权限开关机制。 3. 实现 GitHub 链接标准化、去重、限流、OpenGraph URL 生成与图片下载发送。 4. 新增 config.toml,提供 enable、match_mode、max_links_per_message、hash_salt、request_timeout_seconds 配置。 5. 新增 README 使用说明与示例。
268 lines
10 KiB
Python
268 lines
10 KiB
Python
# -*- coding: utf-8 -*-
|
||
import hashlib
|
||
import re
|
||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||
from urllib.parse import urlparse
|
||
|
||
import aiohttp
|
||
from loguru import logger
|
||
|
||
from base.plugin_common.message_plugin_interface import MessagePluginInterface
|
||
from base.plugin_common.plugin_interface import PluginStatus
|
||
from utils.decorator.plugin_decorators import plugin_stats_decorator
|
||
from utils.robot_cmd.robot_command import GroupBotManager, PermissionStatus
|
||
from wechat_ipad.models.message import MessageType
|
||
|
||
|
||
class GithubOpenGraphPlugin(MessagePluginInterface):
|
||
"""GitHub 链接 OpenGraph 图片插件。
|
||
|
||
设计目标:
|
||
1. 自动识别消息中的 GitHub 链接;
|
||
2. 转换为 OpenGraph 图片地址并发送,便于群聊快速预览;
|
||
3. 兼容群权限开关,满足不同群的启停需求。
|
||
"""
|
||
|
||
# 功能权限常量:用于接入现有“群插件权限开关”体系。
|
||
FEATURE_KEY = "GITHUB_OPENGRAPH"
|
||
FEATURE_DESCRIPTION = "🧩 GitHub链接卡片 [自动转OpenGraph图片]"
|
||
|
||
# 链接提取正则:提取消息中可能的 GitHub URL。
|
||
GITHUB_URL_PATTERN = re.compile(r"https?://(?:www\.)?github\.com/[^\s<>\u3000]+", re.IGNORECASE)
|
||
|
||
# 去除 URL 末尾常见标点,避免“链接后跟句号/括号”导致请求失败。
|
||
TRAILING_PUNCTUATION = ".,!?;:,。!?;:'\"`)]}>"
|
||
|
||
@property
|
||
def name(self) -> str:
|
||
return "GitHub OpenGraph"
|
||
|
||
@property
|
||
def version(self) -> str:
|
||
return "1.0.0"
|
||
|
||
@property
|
||
def description(self) -> str:
|
||
return "自动将GitHub链接转换为OpenGraph预览图片。"
|
||
|
||
@property
|
||
def author(self) -> str:
|
||
return "ABOT Team"
|
||
|
||
@property
|
||
def command_prefix(self) -> Optional[str]:
|
||
# 本插件采用“自动识别链接”模式,不使用命令前缀。
|
||
return ""
|
||
|
||
@property
|
||
def commands(self) -> List[str]:
|
||
# 本插件不依赖命令触发,返回空列表即可。
|
||
return []
|
||
|
||
@property
|
||
def feature_key(self) -> Optional[str]:
|
||
return self.FEATURE_KEY
|
||
|
||
@property
|
||
def feature_description(self) -> Optional[str]:
|
||
return self.FEATURE_DESCRIPTION
|
||
|
||
def __init__(self):
|
||
super().__init__()
|
||
self.LOG = logger
|
||
self.enable = True
|
||
self.match_mode = "fuzzy"
|
||
self.max_links_per_message = 3
|
||
self.hash_salt = ""
|
||
self.request_timeout_seconds = 15
|
||
# 注册插件功能权限,后续在群权限设置中可独立开关。
|
||
self.feature = self.register_feature()
|
||
|
||
def initialize(self, context: Dict[str, Any]) -> bool:
|
||
"""初始化插件配置。"""
|
||
cfg = self._config.get("GithubOpenGraph", {})
|
||
self.enable = bool(cfg.get("enable", True))
|
||
self.match_mode = str(cfg.get("match_mode", "fuzzy") or "fuzzy").strip().lower()
|
||
if self.match_mode not in {"fuzzy", "exact"}:
|
||
self.LOG.warning(f"[{self.name}] match_mode={self.match_mode} 非法,已回退为 fuzzy")
|
||
self.match_mode = "fuzzy"
|
||
self.max_links_per_message = max(1, int(cfg.get("max_links_per_message", 3) or 3))
|
||
self.hash_salt = str(cfg.get("hash_salt", "") or "")
|
||
self.request_timeout_seconds = max(3, int(cfg.get("request_timeout_seconds", 15) or 15))
|
||
self.LOG.info(
|
||
f"[{self.name}] 初始化完成: enable={self.enable}, mode={self.match_mode}, "
|
||
f"max_links={self.max_links_per_message}, timeout={self.request_timeout_seconds}s"
|
||
)
|
||
return True
|
||
|
||
def start(self) -> bool:
|
||
self.status = PluginStatus.RUNNING
|
||
return True
|
||
|
||
def stop(self) -> bool:
|
||
self.status = PluginStatus.STOPPED
|
||
return True
|
||
|
||
def can_process(self, message: Dict[str, Any]) -> bool:
|
||
"""快速判断是否需要进入处理流程。"""
|
||
if not self.enable:
|
||
return False
|
||
if message.get("type") != MessageType.TEXT:
|
||
return False
|
||
content = str(message.get("content", "") or "").strip()
|
||
if not content:
|
||
return False
|
||
|
||
# fuzzy 模式:消息内出现 github.com 即可认为可能命中。
|
||
if self.match_mode == "fuzzy":
|
||
return "github.com/" in content.lower()
|
||
|
||
# exact 模式:整条消息必须是可解析的 GitHub 链接。
|
||
normalized = self._normalize_github_url(content)
|
||
return bool(normalized)
|
||
|
||
@plugin_stats_decorator(plugin_name="GitHub OpenGraph")
|
||
async def process_message(self, message: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
|
||
"""处理文本消息并发送 GitHub OpenGraph 预览图。"""
|
||
room_id = str(message.get("roomid", "") or "").strip()
|
||
sender = str(message.get("sender", "") or "").strip()
|
||
target = room_id if room_id else sender
|
||
gbm: GroupBotManager = message.get("gbm")
|
||
bot = message.get("bot")
|
||
content = str(message.get("content", "") or "").strip()
|
||
|
||
# 群聊场景权限检查:关闭时直接不处理。
|
||
if room_id and gbm and self.feature and gbm.get_group_permission(room_id, self.feature) == PermissionStatus.DISABLED:
|
||
self.LOG.debug(f"[{self.name}] 群权限关闭,跳过: room_id={room_id}")
|
||
return False, "没有权限"
|
||
|
||
github_links = self._extract_github_links(content)
|
||
if not github_links:
|
||
return False, "未匹配到GitHub链接"
|
||
|
||
# 单条消息做去重并限制处理数量,避免多次发送同一张图。
|
||
unique_links = self._deduplicate_keep_order(github_links)[: self.max_links_per_message]
|
||
sent_count = 0
|
||
failed_links: List[str] = []
|
||
|
||
for link in unique_links:
|
||
og_url = self._build_opengraph_url(link)
|
||
image_bytes = await self._download_image_bytes(og_url)
|
||
if not image_bytes:
|
||
failed_links.append(link)
|
||
continue
|
||
|
||
try:
|
||
# send_image_message 支持 bytes,内部会自动转 base64 上传。
|
||
await bot.send_image_message(target, image_bytes)
|
||
sent_count += 1
|
||
self.LOG.info(f"[{self.name}] 发送OpenGraph成功: target={target}, link={link}")
|
||
except Exception as e:
|
||
failed_links.append(link)
|
||
self.LOG.error(f"[{self.name}] 发送OpenGraph失败: target={target}, link={link}, error={e}")
|
||
|
||
if sent_count <= 0:
|
||
return False, f"GitHub链接解析失败,失败数量={len(failed_links)}"
|
||
|
||
summary = f"已发送{sent_count}张GitHub预览图"
|
||
if failed_links:
|
||
summary += f",失败{len(failed_links)}条"
|
||
return True, summary
|
||
|
||
def _extract_github_links(self, content: str) -> List[str]:
|
||
"""根据配置的匹配模式提取 GitHub 链接。"""
|
||
content = str(content or "").strip()
|
||
if not content:
|
||
return []
|
||
|
||
if self.match_mode == "exact":
|
||
exact_url = self._normalize_github_url(content)
|
||
return [exact_url] if exact_url else []
|
||
|
||
# fuzzy 模式:从文本中抽取所有候选链接。
|
||
matches = self.GITHUB_URL_PATTERN.findall(content)
|
||
normalized: List[str] = []
|
||
for raw in matches:
|
||
url = self._normalize_github_url(raw)
|
||
if url:
|
||
normalized.append(url)
|
||
return normalized
|
||
|
||
def _normalize_github_url(self, raw_url: str) -> str:
|
||
"""标准化 GitHub 链接。
|
||
|
||
标准化规则:
|
||
1. 去掉首尾空白和末尾标点;
|
||
2. 仅接受 github.com / www.github.com;
|
||
3. 必须包含 path(至少 /owner/repo 级别);
|
||
4. 丢弃 query 与 fragment,降低重复预览概率。
|
||
"""
|
||
if not raw_url:
|
||
return ""
|
||
|
||
cleaned = str(raw_url).strip().rstrip(self.TRAILING_PUNCTUATION)
|
||
if not cleaned.lower().startswith(("http://", "https://")):
|
||
return ""
|
||
|
||
try:
|
||
parsed = urlparse(cleaned)
|
||
except Exception:
|
||
return ""
|
||
|
||
host = str(parsed.netloc or "").lower()
|
||
if host not in {"github.com", "www.github.com"}:
|
||
return ""
|
||
|
||
# path 至少应包含 owner/repo,避免把 GitHub 首页当成预览链接。
|
||
path = str(parsed.path or "").strip()
|
||
if not path or path == "/":
|
||
return ""
|
||
path_parts = [p for p in path.split("/") if p]
|
||
if len(path_parts) < 2:
|
||
return ""
|
||
|
||
# 统一域名到 github.com,保留路径原貌。
|
||
normalized_path = "/" + "/".join(path_parts)
|
||
return f"https://github.com{normalized_path}"
|
||
|
||
def _build_opengraph_url(self, github_url: str) -> str:
|
||
"""把 GitHub 链接转换成 OpenGraph 图片链接。"""
|
||
parsed = urlparse(github_url)
|
||
path = str(parsed.path or "/")
|
||
hash_text = f"{self.hash_salt}|{github_url}" if self.hash_salt else github_url
|
||
digest = hashlib.sha256(hash_text.encode("utf-8")).hexdigest()[:20]
|
||
return f"https://opengraph.githubassets.com/{digest}{path}"
|
||
|
||
async def _download_image_bytes(self, image_url: str) -> bytes:
|
||
"""下载 OpenGraph 图片,失败时返回空字节。"""
|
||
timeout = aiohttp.ClientTimeout(total=self.request_timeout_seconds)
|
||
try:
|
||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||
async with session.get(image_url) as resp:
|
||
if resp.status != 200:
|
||
self.LOG.warning(f"[{self.name}] 拉取OpenGraph失败: status={resp.status}, url={image_url}")
|
||
return b""
|
||
content_type = str(resp.headers.get("Content-Type", "") or "").lower()
|
||
if "image" not in content_type:
|
||
self.LOG.warning(
|
||
f"[{self.name}] 返回内容不是图片: content_type={content_type}, url={image_url}"
|
||
)
|
||
return b""
|
||
return await resp.read()
|
||
except Exception as e:
|
||
self.LOG.warning(f"[{self.name}] 拉取OpenGraph异常: url={image_url}, error={e}")
|
||
return b""
|
||
|
||
@staticmethod
|
||
def _deduplicate_keep_order(items: List[str]) -> List[str]:
|
||
"""按顺序去重,保持原始出现顺序。"""
|
||
seen: Set[str] = set()
|
||
result: List[str] = []
|
||
for item in items:
|
||
if item in seen:
|
||
continue
|
||
seen.add(item)
|
||
result.append(item)
|
||
return result
|
||
|