自动任务发放功能调整
This commit is contained in:
@@ -382,6 +382,7 @@ def run_random_task_assignment(group_id):
|
|||||||
return
|
return
|
||||||
result = assign_random_task(group_id)
|
result = assign_random_task(group_id)
|
||||||
print(f"{datetime.now()} {result}")
|
print(f"{datetime.now()} {result}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
# 处理群聊消息
|
# 处理群聊消息
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ def get_first_group_id(key):
|
|||||||
group_ids = r.smembers(mapping_prefix + key)
|
group_ids = r.smembers(mapping_prefix + key)
|
||||||
if group_ids:
|
if group_ids:
|
||||||
first_group_id = next(iter(group_ids)) # 获取集合中的第一个元素
|
first_group_id = next(iter(group_ids)) # 获取集合中的第一个元素
|
||||||
return f"First Group ID for {key}: {first_group_id}"
|
print(f"First Group ID for {key}: {first_group_id}")
|
||||||
return first_group_id
|
return first_group_id
|
||||||
else:
|
else:
|
||||||
return f"Key '{key}' has no associated group IDs."
|
return f"Key '{key}' has no associated group IDs."
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import xml.etree.ElementTree as ET
|
|||||||
from wcferry import Wcf
|
from wcferry import Wcf
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class GroupMemberChange:
|
class GroupMemberChange:
|
||||||
def __init__(self, wcf: Wcf):
|
def __init__(self, wcf: Wcf):
|
||||||
self.wcf = wcf # 假设 wcf 对象在此类中初始化
|
self.wcf = wcf # 假设 wcf 对象在此类中初始化
|
||||||
@@ -71,6 +72,8 @@ class GroupMemberChange:
|
|||||||
print(f"Membercount changed: {membercount_previous} -> {membercount_current}")
|
print(f"Membercount changed: {membercount_previous} -> {membercount_current}")
|
||||||
members_current = self.get_current_members(group_id)
|
members_current = self.get_current_members(group_id)
|
||||||
|
|
||||||
|
# TODO 如果用户达到了500人,则删除该群自动添加内容
|
||||||
|
|
||||||
# 比较成员,仅使用 wxid 进行比较
|
# 比较成员,仅使用 wxid 进行比较
|
||||||
members_current_set = set(members_current.keys())
|
members_current_set = set(members_current.keys())
|
||||||
members_previous_set = set(members_previous.keys())
|
members_previous_set = set(members_previous.keys())
|
||||||
|
|||||||
@@ -1,49 +1,145 @@
|
|||||||
import requests
|
import requests
|
||||||
from lxml import etree
|
from bs4 import BeautifulSoup
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
|
||||||
# 设置目标URL和请求头
|
|
||||||
url = "https://www.xiurenwang.cc/bang?f=7"
|
|
||||||
headers = {
|
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
||||||
"Referer": "https://www.xiurenwang.cc/"
|
|
||||||
}
|
|
||||||
|
|
||||||
# 发送请求获取网页内容
|
def get_html(url):
|
||||||
response = requests.get(url, headers=headers)
|
headers = {
|
||||||
response.encoding = "utf-8" # 确保正确解码
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
||||||
|
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||||
|
'Chrome/114.0.0.0 Safari/537.36'
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.text
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"Error fetching {url}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
# 解析HTML
|
|
||||||
html = etree.HTML(response.text)
|
|
||||||
|
|
||||||
# 提取图片链接和标题(假设最新图片在列表页面中)
|
def parse_initial_page(html):
|
||||||
image_items = html.xpath('//div[@class="list"]/li/a[@class="img"]/@href')
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
titles = html.xpath('//div[@class="tit"]/a/text()')
|
posts = soup.find_all('a', href=lambda x: x and x.endswith('.html'))
|
||||||
|
first_two_posts = posts[:2]
|
||||||
|
post_info = []
|
||||||
|
|
||||||
# 创建保存图片的文件夹
|
print(posts)
|
||||||
save_dir = "./xiuren_images"
|
for post in first_two_posts:
|
||||||
if not os.path.exists(save_dir):
|
text = post.text.strip()
|
||||||
os.makedirs(save_dir)
|
print(f"Post text: '{text}'") # 调试输出,检查实际内容
|
||||||
|
if not text:
|
||||||
|
print("Empty post text, skipping...")
|
||||||
|
continue
|
||||||
|
|
||||||
# 只获取最新的一个条目(假设第一个是最新的)
|
parts = text.split()
|
||||||
if image_items:
|
if len(parts) < 2:
|
||||||
latest_url = "https://www.xiurenwang.cc" + image_items[0] # 拼接详情页URL
|
print(f"Unexpected format in '{text}', skipping...")
|
||||||
latest_title = titles[0] if titles else "latest_image"
|
continue
|
||||||
|
|
||||||
# 访问详情页获取图片
|
# 提取编号和图片总数
|
||||||
detail_response = requests.get(latest_url, headers=headers)
|
number = parts[0].replace('No.', '') if parts[0].startswith('No.') else None
|
||||||
detail_html = etree.HTML(detail_response.text)
|
pages = parts[-1].replace('P', '') if parts[-1].endswith('P') else None
|
||||||
image_urls = detail_html.xpath('//div[@id="image"]/a/@href')
|
|
||||||
|
|
||||||
# 下载图片
|
if not number or not pages:
|
||||||
for idx, img_url in enumerate(image_urls):
|
print(f"Failed to parse number or pages from '{text}', skipping...")
|
||||||
img_response = requests.get(img_url, headers=headers)
|
continue
|
||||||
img_name = f"{latest_title}_{idx + 1}.jpg"
|
|
||||||
img_path = os.path.join(save_dir, img_name.replace('/', '_')) # 避免文件名中的斜杠
|
|
||||||
with open(img_path, "wb") as f:
|
|
||||||
f.write(img_response.content)
|
|
||||||
print(f"已下载: {img_path}")
|
|
||||||
else:
|
|
||||||
print("未找到图片链接,可能需要调整XPath或检查网站结构。")
|
|
||||||
|
|
||||||
print("最新图片下载完成!")
|
try:
|
||||||
|
total_images = int(pages)
|
||||||
|
url = 'https://www.xiurenwang.cc/' + post['href']
|
||||||
|
post_info.append({'url': url, 'number': number, 'total_images': total_images})
|
||||||
|
except ValueError:
|
||||||
|
print(f"Invalid total_images value in '{text}', skipping...")
|
||||||
|
continue
|
||||||
|
print(f"post_info:{post_info}")
|
||||||
|
return post_info
|
||||||
|
|
||||||
|
|
||||||
|
def extract_title_and_first_image(html):
|
||||||
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
|
title = soup.title.text.strip()
|
||||||
|
images = soup.find_all('img', src=lambda x: x and 'pic/' in x)
|
||||||
|
if images:
|
||||||
|
first_image = images[0]
|
||||||
|
first_image_src = first_image['src']
|
||||||
|
return title, first_image_src
|
||||||
|
else:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
def parse_image_url(src):
|
||||||
|
image_filename = src.split('/')[-1]
|
||||||
|
starting_number = int(image_filename.split('.')[0])
|
||||||
|
return starting_number
|
||||||
|
|
||||||
|
|
||||||
|
def download_image(image_url, filename):
|
||||||
|
try:
|
||||||
|
response = requests.get(image_url)
|
||||||
|
response.raise_for_status()
|
||||||
|
with open(filename, 'wb') as f:
|
||||||
|
f.write(response.content)
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"Error downloading {image_url}: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def download_images(image_urls, output_dir):
|
||||||
|
if not os.path.exists(output_dir):
|
||||||
|
os.makedirs(output_dir)
|
||||||
|
for i, image_url in enumerate(image_urls):
|
||||||
|
filename = os.path.join(output_dir, f"{i + 1}.jpg")
|
||||||
|
download_image(image_url, filename)
|
||||||
|
time.sleep(random.uniform(1, 3))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
initial_url = 'https://www.xiurenwang.cc/bang?f=7'
|
||||||
|
initial_html = get_html(initial_url)
|
||||||
|
if not initial_html:
|
||||||
|
return
|
||||||
|
|
||||||
|
post_info = parse_initial_page(initial_html)
|
||||||
|
if not post_info:
|
||||||
|
print("No valid posts found.")
|
||||||
|
return
|
||||||
|
|
||||||
|
for post in post_info:
|
||||||
|
post_url = post['url']
|
||||||
|
post_number = post['number']
|
||||||
|
total_images = post['total_images']
|
||||||
|
|
||||||
|
print(f"Processing post {post_number} with {total_images} images...")
|
||||||
|
post_html = get_html(post_url)
|
||||||
|
if not post_html:
|
||||||
|
continue
|
||||||
|
|
||||||
|
title, first_image_src = extract_title_and_first_image(post_html)
|
||||||
|
if not first_image_src:
|
||||||
|
print(f"No image found for post {post_number}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
starting_number = parse_image_url(first_image_src)
|
||||||
|
|
||||||
|
# Construct full base URL
|
||||||
|
base_url = first_image_src.rsplit('/', 1)[0] + '/'
|
||||||
|
full_base_url = 'https:' + base_url
|
||||||
|
|
||||||
|
# Generate image URLs
|
||||||
|
image_urls = []
|
||||||
|
for i in range(total_images):
|
||||||
|
image_number = starting_number + i
|
||||||
|
image_url = full_base_url + str(image_number) + '.jpg'
|
||||||
|
image_urls.append(image_url)
|
||||||
|
|
||||||
|
# Create output directory
|
||||||
|
output_dir = post_number
|
||||||
|
download_images(image_urls, output_dir)
|
||||||
|
|
||||||
|
print(f"Downloaded {total_images} images for post {post_number}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user