diff --git a/game_task/game_task_encyclopedia.py b/game_task/game_task_encyclopedia.py index c281361..1205801 100644 --- a/game_task/game_task_encyclopedia.py +++ b/game_task/game_task_encyclopedia.py @@ -382,6 +382,7 @@ def run_random_task_assignment(group_id): return result = assign_random_task(group_id) print(f"{datetime.now()} {result}") + return result # 处理群聊消息 diff --git a/group_auto/group_auto_invite.py b/group_auto/group_auto_invite.py index 7f1247a..c7bad32 100644 --- a/group_auto/group_auto_invite.py +++ b/group_auto/group_auto_invite.py @@ -54,7 +54,7 @@ def get_first_group_id(key): group_ids = r.smembers(mapping_prefix + key) if group_ids: first_group_id = next(iter(group_ids)) # 获取集合中的第一个元素 - return f"First Group ID for {key}: {first_group_id}" + print(f"First Group ID for {key}: {first_group_id}") return first_group_id else: return f"Key '{key}' has no associated group IDs." diff --git a/group_auto/group_member_change.py b/group_auto/group_member_change.py index c2236dd..0133be1 100644 --- a/group_auto/group_member_change.py +++ b/group_auto/group_member_change.py @@ -4,6 +4,7 @@ import xml.etree.ElementTree as ET from wcferry import Wcf + class GroupMemberChange: def __init__(self, wcf: Wcf): self.wcf = wcf # 假设 wcf 对象在此类中初始化 @@ -71,6 +72,8 @@ class GroupMemberChange: print(f"Membercount changed: {membercount_previous} -> {membercount_current}") members_current = self.get_current_members(group_id) + # TODO 如果用户达到了500人,则删除该群自动添加内容 + # 比较成员,仅使用 wxid 进行比较 members_current_set = set(members_current.keys()) members_previous_set = set(members_previous.keys()) diff --git a/xiuren/xiuren_dl.py b/xiuren/xiuren_dl.py index ad03120..ba4a281 100644 --- a/xiuren/xiuren_dl.py +++ b/xiuren/xiuren_dl.py @@ -1,49 +1,145 @@ import requests -from lxml import etree +from bs4 import BeautifulSoup import os +import time +import random -# 设置目标URL和请求头 -url = "https://www.xiurenwang.cc/bang?f=7" -headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", - "Referer": "https://www.xiurenwang.cc/" -} -# 发送请求获取网页内容 -response = requests.get(url, headers=headers) -response.encoding = "utf-8" # 确保正确解码 +def get_html(url): + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) ' + 'Chrome/114.0.0.0 Safari/537.36' + } + try: + response = requests.get(url, headers=headers) + response.raise_for_status() + return response.text + except requests.exceptions.RequestException as e: + print(f"Error fetching {url}: {e}") + return None -# 解析HTML -html = etree.HTML(response.text) -# 提取图片链接和标题(假设最新图片在列表页面中) -image_items = html.xpath('//div[@class="list"]/li/a[@class="img"]/@href') -titles = html.xpath('//div[@class="tit"]/a/text()') +def parse_initial_page(html): + soup = BeautifulSoup(html, 'html.parser') + posts = soup.find_all('a', href=lambda x: x and x.endswith('.html')) + first_two_posts = posts[:2] + post_info = [] -# 创建保存图片的文件夹 -save_dir = "./xiuren_images" -if not os.path.exists(save_dir): - os.makedirs(save_dir) + print(posts) + for post in first_two_posts: + text = post.text.strip() + print(f"Post text: '{text}'") # 调试输出,检查实际内容 + if not text: + print("Empty post text, skipping...") + continue -# 只获取最新的一个条目(假设第一个是最新的) -if image_items: - latest_url = "https://www.xiurenwang.cc" + image_items[0] # 拼接详情页URL - latest_title = titles[0] if titles else "latest_image" + parts = text.split() + if len(parts) < 2: + print(f"Unexpected format in '{text}', skipping...") + continue - # 访问详情页获取图片 - detail_response = requests.get(latest_url, headers=headers) - detail_html = etree.HTML(detail_response.text) - image_urls = detail_html.xpath('//div[@id="image"]/a/@href') + # 提取编号和图片总数 + number = parts[0].replace('No.', '') if parts[0].startswith('No.') else None + pages = parts[-1].replace('P', '') if parts[-1].endswith('P') else None - # 下载图片 - for idx, img_url in enumerate(image_urls): - img_response = requests.get(img_url, headers=headers) - img_name = f"{latest_title}_{idx + 1}.jpg" - img_path = os.path.join(save_dir, img_name.replace('/', '_')) # 避免文件名中的斜杠 - with open(img_path, "wb") as f: - f.write(img_response.content) - print(f"已下载: {img_path}") -else: - print("未找到图片链接,可能需要调整XPath或检查网站结构。") + if not number or not pages: + print(f"Failed to parse number or pages from '{text}', skipping...") + continue -print("最新图片下载完成!") + try: + total_images = int(pages) + url = 'https://www.xiurenwang.cc/' + post['href'] + post_info.append({'url': url, 'number': number, 'total_images': total_images}) + except ValueError: + print(f"Invalid total_images value in '{text}', skipping...") + continue + print(f"post_info:{post_info}") + return post_info + + +def extract_title_and_first_image(html): + soup = BeautifulSoup(html, 'html.parser') + title = soup.title.text.strip() + images = soup.find_all('img', src=lambda x: x and 'pic/' in x) + if images: + first_image = images[0] + first_image_src = first_image['src'] + return title, first_image_src + else: + return None, None + + +def parse_image_url(src): + image_filename = src.split('/')[-1] + starting_number = int(image_filename.split('.')[0]) + return starting_number + + +def download_image(image_url, filename): + try: + response = requests.get(image_url) + response.raise_for_status() + with open(filename, 'wb') as f: + f.write(response.content) + except requests.exceptions.RequestException as e: + print(f"Error downloading {image_url}: {e}") + + +def download_images(image_urls, output_dir): + if not os.path.exists(output_dir): + os.makedirs(output_dir) + for i, image_url in enumerate(image_urls): + filename = os.path.join(output_dir, f"{i + 1}.jpg") + download_image(image_url, filename) + time.sleep(random.uniform(1, 3)) + + +def main(): + initial_url = 'https://www.xiurenwang.cc/bang?f=7' + initial_html = get_html(initial_url) + if not initial_html: + return + + post_info = parse_initial_page(initial_html) + if not post_info: + print("No valid posts found.") + return + + for post in post_info: + post_url = post['url'] + post_number = post['number'] + total_images = post['total_images'] + + print(f"Processing post {post_number} with {total_images} images...") + post_html = get_html(post_url) + if not post_html: + continue + + title, first_image_src = extract_title_and_first_image(post_html) + if not first_image_src: + print(f"No image found for post {post_number}") + continue + + starting_number = parse_image_url(first_image_src) + + # Construct full base URL + base_url = first_image_src.rsplit('/', 1)[0] + '/' + full_base_url = 'https:' + base_url + + # Generate image URLs + image_urls = [] + for i in range(total_images): + image_number = starting_number + i + image_url = full_base_url + str(image_number) + '.jpg' + image_urls.append(image_url) + + # Create output directory + output_dir = post_number + download_images(image_urls, output_dir) + + print(f"Downloaded {total_images} images for post {post_number}") + + +if __name__ == '__main__': + main() \ No newline at end of file