Files
JieXi/parsers/factory.py
2025-11-30 19:49:25 +08:00

154 lines
5.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from parsers.douyin import DouyinParser
from parsers.tiktok import TikTokParser
from parsers.bilibili import BilibiliMirParser, BilibiliBugPKParser, BilibiliYaohuParser
from parsers.kuaishou import KuaishouBugPKParser, KuaishouUctbParser
from parsers.pipixia import PipixiaBugPKParser, PipixiaUctbParser
from parsers.weibo import WeiboUctbParser, WeiboYaohuParser
from models import ParserAPI
import random
import requests
class ParserFactory:
"""解析器工厂类"""
@staticmethod
def expand_short_url(url: str) -> str:
"""展开短链接获取真实URL"""
short_domains = ['b23.tv', 'v.douyin.com', 't.cn']
# 检查是否是短链接
is_short = any(domain in url.lower() for domain in short_domains)
if not is_short:
return url
try:
# 发送请求,不跟随重定向,获取 Location 头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
response = requests.head(url, headers=headers, allow_redirects=False, timeout=10)
if response.status_code in [301, 302, 303, 307, 308]:
location = response.headers.get('Location', '')
if location:
# 如果还是短链接,继续展开
if any(domain in location.lower() for domain in short_domains):
return ParserFactory.expand_short_url(location)
return location
# 如果 HEAD 请求不行,尝试 GET 请求跟随重定向
response = requests.get(url, headers=headers, allow_redirects=True, timeout=10)
return response.url
except Exception:
# 展开失败返回原URL
return url
@staticmethod
def create_parser(api_config: ParserAPI):
"""根据API配置创建解析器实例"""
platform = api_config.platform.lower()
api_url = api_config.api_url
api_key = api_config.api_key
if platform == 'douyin':
return DouyinParser(api_url, api_key)
elif platform == 'tiktok':
return TikTokParser(api_url, api_key)
elif platform == 'bilibili':
# 根据API名称选择不同的解析器
if 'mir6' in api_url:
return BilibiliMirParser(api_url, api_key)
elif 'bugpk' in api_url:
return BilibiliBugPKParser(api_url, api_key)
elif 'yaohud' in api_url:
return BilibiliYaohuParser(api_url, api_key)
else:
return BilibiliMirParser(api_url, api_key)
elif platform == 'kuaishou':
# 快手解析器
if 'bugpk' in api_url:
return KuaishouBugPKParser(api_url, api_key)
elif 'uctb' in api_url:
return KuaishouUctbParser(api_url, api_key)
else:
return KuaishouBugPKParser(api_url, api_key)
elif platform == 'pipixia':
# 皮皮虾解析器
if 'bugpk' in api_url:
return PipixiaBugPKParser(api_url, api_key)
elif 'uctb' in api_url:
return PipixiaUctbParser(api_url, api_key)
else:
return PipixiaBugPKParser(api_url, api_key)
elif platform == 'weibo':
# 微博解析器
if 'uctb' in api_url:
return WeiboUctbParser(api_url, api_key)
elif 'yaohud' in api_url:
return WeiboYaohuParser(api_url, api_key)
else:
return WeiboUctbParser(api_url, api_key)
else:
raise ValueError(f"不支持的平台: {platform}")
@staticmethod
def get_parser_for_platform(platform: str):
"""获取指定平台的解析器(带负载均衡)"""
from models import db
# 查询该平台所有启用且健康的API
apis = ParserAPI.query.filter_by(
platform=platform.lower(),
is_enabled=True,
health_status=True
).all()
if not apis:
raise Exception(f"没有可用的{platform}解析接口")
# 如果是哔哩哔哩,使用加权随机选择(负载均衡)
if platform.lower() == 'bilibili' and len(apis) > 1:
api = ParserFactory._weighted_random_choice(apis)
else:
# 其他平台选择第一个可用的
api = apis[0]
return ParserFactory.create_parser(api), api
@staticmethod
def _weighted_random_choice(apis):
"""加权随机选择"""
total_weight = sum(api.weight for api in apis)
if total_weight == 0:
return random.choice(apis)
rand = random.uniform(0, total_weight)
current = 0
for api in apis:
current += api.weight
if rand <= current:
return api
return apis[-1]
@staticmethod
def detect_platform(video_url: str) -> str:
"""检测视频链接所属平台"""
url_lower = video_url.lower()
if 'douyin.com' in url_lower or 'v.douyin' in url_lower:
return 'douyin'
elif 'tiktok.com' in url_lower:
return 'tiktok'
elif 'bilibili.com' in url_lower or 'b23.tv' in url_lower:
return 'bilibili'
elif 'kuaishou.com' in url_lower or 'gifshow.com' in url_lower:
return 'kuaishou'
elif 'pipix.com' in url_lower or 'pipixia.com' in url_lower or 'h5.pipix.com' in url_lower:
return 'pipixia'
elif 'weibo.com' in url_lower or 'weibo.cn' in url_lower:
return 'weibo'
else:
raise ValueError("无法识别的视频平台")