加入绅士R15的内容
This commit is contained in:
5
main.py
5
main.py
@@ -119,6 +119,11 @@ def jobs(robot: Robot):
|
||||
async def xiuren_download_job():
|
||||
await robot.xiu_ren_download_task()
|
||||
|
||||
# ✅ 每天 01:30 下载秀人网帖子
|
||||
@async_job.at_times(["2:30"])
|
||||
async def shenshiR15_download_job():
|
||||
await robot.shen_shi_download_task()
|
||||
|
||||
# ✅ 每天 17:30 发秀人 PDF(如果启用)
|
||||
# @async_job.at_times(["17:30"])
|
||||
# async def xiuren_pdf_send_job():
|
||||
|
||||
226
plugins/xiuren_image/shenshi_r15.py
Normal file
226
plugins/xiuren_image/shenshi_r15.py
Normal file
@@ -0,0 +1,226 @@
|
||||
import os
|
||||
import time
|
||||
import requests
|
||||
import urllib3
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urljoin
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
|
||||
# --- 配置区域 ---
|
||||
BASE_URL = "https://www.hentaiclub.net"
|
||||
START_URL = "https://www.hentaiclub.net/sort/r15.html/1/"
|
||||
|
||||
# 全局定义下载根目录
|
||||
download_root = "/mnt/nfs_share"
|
||||
|
||||
# 每天下载的新帖子数量限制
|
||||
DAILY_LIMIT = 10
|
||||
|
||||
# 屏蔽 SSL 不安全请求的警告
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
# 伪装请求头
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||
"Referer": BASE_URL
|
||||
}
|
||||
|
||||
|
||||
def get_post_id(url):
|
||||
"""
|
||||
从URL中提取唯一ID
|
||||
示例: .../64068.html -> 64068
|
||||
"""
|
||||
try:
|
||||
filename = url.split('/')[-1]
|
||||
return filename.split('.')[0]
|
||||
except:
|
||||
return None
|
||||
|
||||
|
||||
def is_downloaded(post_id):
|
||||
"""
|
||||
检查 download_root 下是否已经存在以该 ID 开头的文件夹
|
||||
"""
|
||||
# 如果根目录本身不存在,说明肯定没下载过
|
||||
if not os.path.exists(download_root):
|
||||
return False
|
||||
|
||||
try:
|
||||
existing_folders = os.listdir(download_root)
|
||||
except OSError as e:
|
||||
print(f"无法读取目录 {download_root}: {e}")
|
||||
return False
|
||||
|
||||
# 匹配 "ID" 或 "ID_标题"
|
||||
prefix = f"{post_id}_"
|
||||
for folder in existing_folders:
|
||||
if folder.startswith(prefix) or folder == post_id:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def get_soup(url):
|
||||
"""获取页面内容,忽略 SSL 验证"""
|
||||
try:
|
||||
response = requests.get(url, headers=HEADERS, timeout=10, verify=False)
|
||||
response.raise_for_status()
|
||||
return BeautifulSoup(response.text, 'html.parser')
|
||||
except Exception as e:
|
||||
print(f"请求失败: {url} - 错误: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def download_image_as_jpg(img_url, folder_path, file_name):
|
||||
"""
|
||||
下载图片并利用 Pillow 转换为 JPG 格式保存
|
||||
"""
|
||||
try:
|
||||
file_path = os.path.join(folder_path, file_name)
|
||||
|
||||
if os.path.exists(file_path):
|
||||
return
|
||||
|
||||
# 下载图片数据 (verify=False 忽略证书错误)
|
||||
resp = requests.get(img_url, headers=HEADERS, timeout=20, verify=False)
|
||||
resp.raise_for_status()
|
||||
|
||||
# 读取内存中的图片
|
||||
image_data = BytesIO(resp.content)
|
||||
img = Image.open(image_data)
|
||||
|
||||
# 转换为 RGB 模式 (处理 WebP/PNG 透明背景,防止保存 JPG 报错)
|
||||
if img.mode in ("RGBA", "P"):
|
||||
img = img.convert("RGB")
|
||||
|
||||
# 保存为 JPG
|
||||
img.save(file_path, "JPEG", quality=90)
|
||||
|
||||
except Exception as e:
|
||||
print(f" -> 图片处理失败 {img_url}: {e}")
|
||||
|
||||
|
||||
def parse_detail_page(post_url, post_title, post_id):
|
||||
"""解析详情页并下载图片"""
|
||||
|
||||
# 过滤文件名中的非法字符
|
||||
safe_title = "".join([c for c in post_title if c.isalnum() or c in (' ', '-', '_')]).strip()
|
||||
folder_name = f"{post_id}_{safe_title}"
|
||||
|
||||
# 使用全局 download_root 拼接路径
|
||||
post_dir = os.path.join(download_root, folder_name)
|
||||
|
||||
print(f"正在处理: {folder_name}")
|
||||
|
||||
soup = get_soup(post_url)
|
||||
if not soup:
|
||||
return False
|
||||
|
||||
# 创建子目录
|
||||
if not os.path.exists(post_dir):
|
||||
try:
|
||||
os.makedirs(post_dir)
|
||||
except OSError as e:
|
||||
print(f"无法创建目录 {post_dir}: {e}")
|
||||
return False
|
||||
|
||||
items = soup.select('#masonry .post-item')
|
||||
print(f" 包含 {len(items)} 张图片,开始下载并转为 JPG...")
|
||||
|
||||
for index, item in enumerate(items):
|
||||
# 优先获取高清大图链接
|
||||
img_url = item.get('data-src')
|
||||
if not img_url:
|
||||
img_tag = item.find('img')
|
||||
if img_tag:
|
||||
img_url = img_tag.get('data-original') or img_tag.get('src')
|
||||
|
||||
if img_url:
|
||||
# 补全 URL
|
||||
if not img_url.startswith('http'):
|
||||
img_url = urljoin(BASE_URL, img_url)
|
||||
|
||||
# 强制命名为 .jpg
|
||||
file_name = f"{index + 1:03d}.jpg"
|
||||
|
||||
download_image_as_jpg(img_url, post_dir, file_name)
|
||||
|
||||
print(f" 完成.\n")
|
||||
return True
|
||||
|
||||
|
||||
def run_daily_job():
|
||||
"""主逻辑"""
|
||||
current_download_count = 0
|
||||
current_page_url = START_URL
|
||||
|
||||
print(f"下载目录: {download_root}")
|
||||
|
||||
while current_download_count < DAILY_LIMIT:
|
||||
print(f"正在扫描列表页: {current_page_url}")
|
||||
soup = get_soup(current_page_url)
|
||||
if not soup:
|
||||
break
|
||||
|
||||
items = soup.select('#masonry .item')
|
||||
if not items:
|
||||
print("本页无内容。")
|
||||
break
|
||||
|
||||
for item in items:
|
||||
# 检查是否达标
|
||||
if current_download_count >= DAILY_LIMIT:
|
||||
print(f"=== 今日任务已完成 ({DAILY_LIMIT}个) ===")
|
||||
return
|
||||
|
||||
link_tag = item.select_one('a.item-link')
|
||||
if not link_tag:
|
||||
continue
|
||||
|
||||
href = link_tag.get('href')
|
||||
|
||||
# 1. 获取 ID
|
||||
post_id = get_post_id(href)
|
||||
if not post_id:
|
||||
continue
|
||||
|
||||
# 2. 检查 NFS 目录下是否存在该 ID
|
||||
if is_downloaded(post_id):
|
||||
# print(f"跳过已存在: {post_id}")
|
||||
continue
|
||||
|
||||
# 3. 开始下载新帖子
|
||||
title_div = link_tag.select_one('.item-link-text')
|
||||
title = title_div.get_text(strip=True) if title_div else "未命名"
|
||||
|
||||
success = parse_detail_page(href, title, post_id)
|
||||
|
||||
if success:
|
||||
current_download_count += 1
|
||||
print(f"=== 进度: {current_download_count}/{DAILY_LIMIT} ===\n")
|
||||
time.sleep(2) # 避免请求过快
|
||||
|
||||
# 翻页
|
||||
next_page = soup.select_one('.page-navigator .next a')
|
||||
if next_page:
|
||||
current_page_url = next_page.get('href')
|
||||
time.sleep(1)
|
||||
else:
|
||||
print("已到达最后一页,没有更多帖子了。")
|
||||
break
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 尝试创建根目录(如果不存在)
|
||||
if not os.path.exists(download_root):
|
||||
try:
|
||||
os.makedirs(download_root)
|
||||
print(f"创建目录成功: {download_root}")
|
||||
except OSError as e:
|
||||
print(f"错误: 无法创建根目录 {download_root}。请检查权限或手动挂载 NFS。")
|
||||
print(f"系统报错: {e}")
|
||||
exit(1)
|
||||
|
||||
print(f"开始任务:下载 {DAILY_LIMIT} 个新帖子 (JPG格式, 忽略SSL)")
|
||||
run_daily_job()
|
||||
8
robot.py
8
robot.py
@@ -20,6 +20,7 @@ from configuration import Config
|
||||
from db.connection import DBConnectionManager
|
||||
from db.contacts_db import ContactsDBOperator
|
||||
from plugins.xiuren_image.meitu_dl import meitu_dowload_pub_pic
|
||||
from plugins.xiuren_image.shenshi_r15 import run_daily_job
|
||||
from utils.email_util import EmailSender
|
||||
from utils.revoke.message_auto_revoke import MessageAutoRevoke
|
||||
from utils.robot_cmd.robot_command import GroupBotManager, Feature, PermissionStatus
|
||||
@@ -709,6 +710,13 @@ class Robot:
|
||||
except Exception as e:
|
||||
self.LOG.error(f"xiu_ren_download_task error:{e}")
|
||||
|
||||
async def shen_shi_download_task(self):
|
||||
try:
|
||||
# 每天下载10组图,然后发一个帖子PDF
|
||||
run_daily_job()
|
||||
except Exception as e:
|
||||
self.LOG.error(f"shen_shi_download_task error:{e}")
|
||||
|
||||
async def generate_and_send_ranking(self):
|
||||
try:
|
||||
receivers = self.gbm.get_group_list()
|
||||
|
||||
Reference in New Issue
Block a user