Files
abot/xiuren/xiuren_dl.py
2025-02-24 10:44:17 +08:00

50 lines
1.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import requests
from lxml import etree
import os
# 设置目标URL和请求头
url = "https://www.xiurenwang.cc/bang?f=7"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "https://www.xiurenwang.cc/"
}
# 发送请求获取网页内容
response = requests.get(url, headers=headers)
response.encoding = "utf-8" # 确保正确解码
# 解析HTML
html = etree.HTML(response.text)
# 提取图片链接和标题(假设最新图片在列表页面中)
image_items = html.xpath('//div[@class="list"]/li/a[@class="img"]/@href')
titles = html.xpath('//div[@class="tit"]/a/text()')
# 创建保存图片的文件夹
save_dir = "./xiuren_images"
if not os.path.exists(save_dir):
os.makedirs(save_dir)
# 只获取最新的一个条目(假设第一个是最新的)
if image_items:
latest_url = "https://www.xiurenwang.cc" + image_items[0] # 拼接详情页URL
latest_title = titles[0] if titles else "latest_image"
# 访问详情页获取图片
detail_response = requests.get(latest_url, headers=headers)
detail_html = etree.HTML(detail_response.text)
image_urls = detail_html.xpath('//div[@id="image"]/a/@href')
# 下载图片
for idx, img_url in enumerate(image_urls):
img_response = requests.get(img_url, headers=headers)
img_name = f"{latest_title}_{idx + 1}.jpg"
img_path = os.path.join(save_dir, img_name.replace('/', '_')) # 避免文件名中的斜杠
with open(img_path, "wb") as f:
f.write(img_response.content)
print(f"已下载: {img_path}")
else:
print("未找到图片链接可能需要调整XPath或检查网站结构。")
print("最新图片下载完成!")