加入绅士R15的内容

This commit is contained in:
liuwei
2025-12-10 14:53:54 +08:00
parent 33897dc1cd
commit e204610bc5
3 changed files with 239 additions and 0 deletions

View File

@@ -119,6 +119,11 @@ def jobs(robot: Robot):
async def xiuren_download_job(): async def xiuren_download_job():
await robot.xiu_ren_download_task() await robot.xiu_ren_download_task()
# ✅ 每天 01:30 下载秀人网帖子
@async_job.at_times(["2:30"])
async def shenshiR15_download_job():
await robot.shen_shi_download_task()
# ✅ 每天 17:30 发秀人 PDF如果启用 # ✅ 每天 17:30 发秀人 PDF如果启用
# @async_job.at_times(["17:30"]) # @async_job.at_times(["17:30"])
# async def xiuren_pdf_send_job(): # async def xiuren_pdf_send_job():

View File

@@ -0,0 +1,226 @@
import os
import time
import requests
import urllib3
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from io import BytesIO
from PIL import Image
# --- 配置区域 ---
BASE_URL = "https://www.hentaiclub.net"
START_URL = "https://www.hentaiclub.net/sort/r15.html/1/"
# 全局定义下载根目录
download_root = "/mnt/nfs_share"
# 每天下载的新帖子数量限制
DAILY_LIMIT = 10
# 屏蔽 SSL 不安全请求的警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# 伪装请求头
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Referer": BASE_URL
}
def get_post_id(url):
"""
从URL中提取唯一ID
示例: .../64068.html -> 64068
"""
try:
filename = url.split('/')[-1]
return filename.split('.')[0]
except:
return None
def is_downloaded(post_id):
"""
检查 download_root 下是否已经存在以该 ID 开头的文件夹
"""
# 如果根目录本身不存在,说明肯定没下载过
if not os.path.exists(download_root):
return False
try:
existing_folders = os.listdir(download_root)
except OSError as e:
print(f"无法读取目录 {download_root}: {e}")
return False
# 匹配 "ID" 或 "ID_标题"
prefix = f"{post_id}_"
for folder in existing_folders:
if folder.startswith(prefix) or folder == post_id:
return True
return False
def get_soup(url):
"""获取页面内容,忽略 SSL 验证"""
try:
response = requests.get(url, headers=HEADERS, timeout=10, verify=False)
response.raise_for_status()
return BeautifulSoup(response.text, 'html.parser')
except Exception as e:
print(f"请求失败: {url} - 错误: {e}")
return None
def download_image_as_jpg(img_url, folder_path, file_name):
"""
下载图片并利用 Pillow 转换为 JPG 格式保存
"""
try:
file_path = os.path.join(folder_path, file_name)
if os.path.exists(file_path):
return
# 下载图片数据 (verify=False 忽略证书错误)
resp = requests.get(img_url, headers=HEADERS, timeout=20, verify=False)
resp.raise_for_status()
# 读取内存中的图片
image_data = BytesIO(resp.content)
img = Image.open(image_data)
# 转换为 RGB 模式 (处理 WebP/PNG 透明背景,防止保存 JPG 报错)
if img.mode in ("RGBA", "P"):
img = img.convert("RGB")
# 保存为 JPG
img.save(file_path, "JPEG", quality=90)
except Exception as e:
print(f" -> 图片处理失败 {img_url}: {e}")
def parse_detail_page(post_url, post_title, post_id):
"""解析详情页并下载图片"""
# 过滤文件名中的非法字符
safe_title = "".join([c for c in post_title if c.isalnum() or c in (' ', '-', '_')]).strip()
folder_name = f"{post_id}_{safe_title}"
# 使用全局 download_root 拼接路径
post_dir = os.path.join(download_root, folder_name)
print(f"正在处理: {folder_name}")
soup = get_soup(post_url)
if not soup:
return False
# 创建子目录
if not os.path.exists(post_dir):
try:
os.makedirs(post_dir)
except OSError as e:
print(f"无法创建目录 {post_dir}: {e}")
return False
items = soup.select('#masonry .post-item')
print(f" 包含 {len(items)} 张图片,开始下载并转为 JPG...")
for index, item in enumerate(items):
# 优先获取高清大图链接
img_url = item.get('data-src')
if not img_url:
img_tag = item.find('img')
if img_tag:
img_url = img_tag.get('data-original') or img_tag.get('src')
if img_url:
# 补全 URL
if not img_url.startswith('http'):
img_url = urljoin(BASE_URL, img_url)
# 强制命名为 .jpg
file_name = f"{index + 1:03d}.jpg"
download_image_as_jpg(img_url, post_dir, file_name)
print(f" 完成.\n")
return True
def run_daily_job():
"""主逻辑"""
current_download_count = 0
current_page_url = START_URL
print(f"下载目录: {download_root}")
while current_download_count < DAILY_LIMIT:
print(f"正在扫描列表页: {current_page_url}")
soup = get_soup(current_page_url)
if not soup:
break
items = soup.select('#masonry .item')
if not items:
print("本页无内容。")
break
for item in items:
# 检查是否达标
if current_download_count >= DAILY_LIMIT:
print(f"=== 今日任务已完成 ({DAILY_LIMIT}个) ===")
return
link_tag = item.select_one('a.item-link')
if not link_tag:
continue
href = link_tag.get('href')
# 1. 获取 ID
post_id = get_post_id(href)
if not post_id:
continue
# 2. 检查 NFS 目录下是否存在该 ID
if is_downloaded(post_id):
# print(f"跳过已存在: {post_id}")
continue
# 3. 开始下载新帖子
title_div = link_tag.select_one('.item-link-text')
title = title_div.get_text(strip=True) if title_div else "未命名"
success = parse_detail_page(href, title, post_id)
if success:
current_download_count += 1
print(f"=== 进度: {current_download_count}/{DAILY_LIMIT} ===\n")
time.sleep(2) # 避免请求过快
# 翻页
next_page = soup.select_one('.page-navigator .next a')
if next_page:
current_page_url = next_page.get('href')
time.sleep(1)
else:
print("已到达最后一页,没有更多帖子了。")
break
if __name__ == "__main__":
# 尝试创建根目录(如果不存在)
if not os.path.exists(download_root):
try:
os.makedirs(download_root)
print(f"创建目录成功: {download_root}")
except OSError as e:
print(f"错误: 无法创建根目录 {download_root}。请检查权限或手动挂载 NFS。")
print(f"系统报错: {e}")
exit(1)
print(f"开始任务:下载 {DAILY_LIMIT} 个新帖子 (JPG格式, 忽略SSL)")
run_daily_job()

View File

@@ -20,6 +20,7 @@ from configuration import Config
from db.connection import DBConnectionManager from db.connection import DBConnectionManager
from db.contacts_db import ContactsDBOperator from db.contacts_db import ContactsDBOperator
from plugins.xiuren_image.meitu_dl import meitu_dowload_pub_pic from plugins.xiuren_image.meitu_dl import meitu_dowload_pub_pic
from plugins.xiuren_image.shenshi_r15 import run_daily_job
from utils.email_util import EmailSender from utils.email_util import EmailSender
from utils.revoke.message_auto_revoke import MessageAutoRevoke from utils.revoke.message_auto_revoke import MessageAutoRevoke
from utils.robot_cmd.robot_command import GroupBotManager, Feature, PermissionStatus from utils.robot_cmd.robot_command import GroupBotManager, Feature, PermissionStatus
@@ -709,6 +710,13 @@ class Robot:
except Exception as e: except Exception as e:
self.LOG.error(f"xiu_ren_download_task error{e}") self.LOG.error(f"xiu_ren_download_task error{e}")
async def shen_shi_download_task(self):
try:
# 每天下载10组图然后发一个帖子PDF
run_daily_job()
except Exception as e:
self.LOG.error(f"shen_shi_download_task error{e}")
async def generate_and_send_ranking(self): async def generate_and_send_ranking(self):
try: try:
receivers = self.gbm.get_group_list() receivers = self.gbm.get_group_list()