154 lines
5.7 KiB
Python
154 lines
5.7 KiB
Python
from parsers.douyin import DouyinParser
|
||
from parsers.tiktok import TikTokParser
|
||
from parsers.bilibili import BilibiliMirParser, BilibiliBugPKParser, BilibiliYaohuParser
|
||
from parsers.kuaishou import KuaishouBugPKParser, KuaishouUctbParser
|
||
from parsers.pipixia import PipixiaBugPKParser, PipixiaUctbParser
|
||
from parsers.weibo import WeiboUctbParser, WeiboYaohuParser
|
||
from models import ParserAPI
|
||
import random
|
||
import requests
|
||
|
||
class ParserFactory:
|
||
"""解析器工厂类"""
|
||
|
||
@staticmethod
|
||
def expand_short_url(url: str) -> str:
|
||
"""展开短链接,获取真实URL"""
|
||
short_domains = ['b23.tv', 'v.douyin.com', 't.cn']
|
||
|
||
# 检查是否是短链接
|
||
is_short = any(domain in url.lower() for domain in short_domains)
|
||
if not is_short:
|
||
return url
|
||
|
||
try:
|
||
# 发送请求,不跟随重定向,获取 Location 头
|
||
headers = {
|
||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||
}
|
||
response = requests.head(url, headers=headers, allow_redirects=False, timeout=10)
|
||
|
||
if response.status_code in [301, 302, 303, 307, 308]:
|
||
location = response.headers.get('Location', '')
|
||
if location:
|
||
# 如果还是短链接,继续展开
|
||
if any(domain in location.lower() for domain in short_domains):
|
||
return ParserFactory.expand_short_url(location)
|
||
return location
|
||
|
||
# 如果 HEAD 请求不行,尝试 GET 请求跟随重定向
|
||
response = requests.get(url, headers=headers, allow_redirects=True, timeout=10)
|
||
return response.url
|
||
except Exception:
|
||
# 展开失败,返回原URL
|
||
return url
|
||
|
||
@staticmethod
|
||
def create_parser(api_config: ParserAPI):
|
||
"""根据API配置创建解析器实例"""
|
||
platform = api_config.platform.lower()
|
||
api_url = api_config.api_url
|
||
api_key = api_config.api_key
|
||
|
||
if platform == 'douyin':
|
||
return DouyinParser(api_url, api_key)
|
||
elif platform == 'tiktok':
|
||
return TikTokParser(api_url, api_key)
|
||
elif platform == 'bilibili':
|
||
# 根据API名称选择不同的解析器
|
||
if 'mir6' in api_url:
|
||
return BilibiliMirParser(api_url, api_key)
|
||
elif 'bugpk' in api_url:
|
||
return BilibiliBugPKParser(api_url, api_key)
|
||
elif 'yaohud' in api_url:
|
||
return BilibiliYaohuParser(api_url, api_key)
|
||
else:
|
||
return BilibiliMirParser(api_url, api_key)
|
||
elif platform == 'kuaishou':
|
||
# 快手解析器
|
||
if 'bugpk' in api_url:
|
||
return KuaishouBugPKParser(api_url, api_key)
|
||
elif 'uctb' in api_url:
|
||
return KuaishouUctbParser(api_url, api_key)
|
||
else:
|
||
return KuaishouBugPKParser(api_url, api_key)
|
||
elif platform == 'pipixia':
|
||
# 皮皮虾解析器
|
||
if 'bugpk' in api_url:
|
||
return PipixiaBugPKParser(api_url, api_key)
|
||
elif 'uctb' in api_url:
|
||
return PipixiaUctbParser(api_url, api_key)
|
||
else:
|
||
return PipixiaBugPKParser(api_url, api_key)
|
||
elif platform == 'weibo':
|
||
# 微博解析器
|
||
if 'uctb' in api_url:
|
||
return WeiboUctbParser(api_url, api_key)
|
||
elif 'yaohud' in api_url:
|
||
return WeiboYaohuParser(api_url, api_key)
|
||
else:
|
||
return WeiboUctbParser(api_url, api_key)
|
||
else:
|
||
raise ValueError(f"不支持的平台: {platform}")
|
||
|
||
@staticmethod
|
||
def get_parser_for_platform(platform: str):
|
||
"""获取指定平台的解析器(带负载均衡)"""
|
||
from models import db
|
||
|
||
# 查询该平台所有启用且健康的API
|
||
apis = ParserAPI.query.filter_by(
|
||
platform=platform.lower(),
|
||
is_enabled=True,
|
||
health_status=True
|
||
).all()
|
||
|
||
if not apis:
|
||
raise Exception(f"没有可用的{platform}解析接口")
|
||
|
||
# 如果是哔哩哔哩,使用加权随机选择(负载均衡)
|
||
if platform.lower() == 'bilibili' and len(apis) > 1:
|
||
api = ParserFactory._weighted_random_choice(apis)
|
||
else:
|
||
# 其他平台选择第一个可用的
|
||
api = apis[0]
|
||
|
||
return ParserFactory.create_parser(api), api
|
||
|
||
@staticmethod
|
||
def _weighted_random_choice(apis):
|
||
"""加权随机选择"""
|
||
total_weight = sum(api.weight for api in apis)
|
||
if total_weight == 0:
|
||
return random.choice(apis)
|
||
|
||
rand = random.uniform(0, total_weight)
|
||
current = 0
|
||
|
||
for api in apis:
|
||
current += api.weight
|
||
if rand <= current:
|
||
return api
|
||
|
||
return apis[-1]
|
||
|
||
@staticmethod
|
||
def detect_platform(video_url: str) -> str:
|
||
"""检测视频链接所属平台"""
|
||
url_lower = video_url.lower()
|
||
|
||
if 'douyin.com' in url_lower or 'v.douyin' in url_lower:
|
||
return 'douyin'
|
||
elif 'tiktok.com' in url_lower:
|
||
return 'tiktok'
|
||
elif 'bilibili.com' in url_lower or 'b23.tv' in url_lower:
|
||
return 'bilibili'
|
||
elif 'kuaishou.com' in url_lower or 'gifshow.com' in url_lower:
|
||
return 'kuaishou'
|
||
elif 'pipix.com' in url_lower or 'pipixia.com' in url_lower or 'h5.pipix.com' in url_lower:
|
||
return 'pipixia'
|
||
elif 'weibo.com' in url_lower or 'weibo.cn' in url_lower:
|
||
return 'weibo'
|
||
else:
|
||
raise ValueError("无法识别的视频平台")
|