优化表情异步补处理的参数提取与下载重试
- 对表情 XML 先做 HTML 实体还原,避免下载地址中的 & 影响请求 - 优先使用 XML 节点提取 aeskey、md5、len 和多种下载地址,正则作为补充兜底 - 异步补处理时按 encrypturl、cdnurl、thumburl 顺序重试下载 - 所有地址失败时记录业务告警,避免无意义异常栈刷屏
This commit is contained in:
@@ -3,6 +3,7 @@ import time
|
|||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
import concurrent.futures # 添加线程池支持
|
import concurrent.futures # 添加线程池支持
|
||||||
|
import html
|
||||||
import os
|
import os
|
||||||
import base64
|
import base64
|
||||||
import re
|
import re
|
||||||
@@ -126,26 +127,61 @@ class MessageStorage:
|
|||||||
if not xml_content:
|
if not xml_content:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
aeskey_match = self._aeskey_re.search(xml_content)
|
normalized_xml = html.unescape(xml_content)
|
||||||
if not aeskey_match:
|
aeskey = ""
|
||||||
return {}
|
md5 = ""
|
||||||
|
length = 0
|
||||||
|
urls = []
|
||||||
|
|
||||||
url_match = (
|
try:
|
||||||
self._emoji_cdn_re.search(xml_content)
|
root = ET.fromstring(normalized_xml)
|
||||||
or self._emoji_encrypt_re.search(xml_content)
|
emoji_node = root.find(".//emoji")
|
||||||
or self._emoji_thumb_re.search(xml_content)
|
if emoji_node is not None:
|
||||||
)
|
aeskey = (emoji_node.attrib.get("aeskey") or "").strip()
|
||||||
if not url_match:
|
md5 = (emoji_node.attrib.get("md5") or "").strip()
|
||||||
return {}
|
try:
|
||||||
|
length = int((emoji_node.attrib.get("len") or "0").strip() or 0)
|
||||||
|
except Exception:
|
||||||
|
length = 0
|
||||||
|
urls.extend([
|
||||||
|
(emoji_node.attrib.get("encrypturl") or "").strip(),
|
||||||
|
(emoji_node.attrib.get("cdnurl") or "").strip(),
|
||||||
|
(emoji_node.attrib.get("thumburl") or "").strip(),
|
||||||
|
])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
md5_match = re.search(r'md5="(.*?)"', xml_content)
|
if not aeskey:
|
||||||
length_match = re.search(r'len="(\d+)"', xml_content)
|
aeskey_match = self._aeskey_re.search(normalized_xml)
|
||||||
|
aeskey = aeskey_match.group(1).strip() if aeskey_match else ""
|
||||||
|
|
||||||
|
if not md5:
|
||||||
|
md5_match = re.search(r'md5="(.*?)"', normalized_xml)
|
||||||
|
md5 = md5_match.group(1).strip() if md5_match else ""
|
||||||
|
|
||||||
|
if not length:
|
||||||
|
length_match = re.search(r'len="(\d+)"', normalized_xml)
|
||||||
|
length = int(length_match.group(1)) if length_match else 0
|
||||||
|
|
||||||
|
encrypt_match = self._emoji_encrypt_re.search(normalized_xml)
|
||||||
|
cdn_match = self._emoji_cdn_re.search(normalized_xml)
|
||||||
|
thumb_match = self._emoji_thumb_re.search(normalized_xml)
|
||||||
|
urls.extend([
|
||||||
|
encrypt_match.group(1).strip() if encrypt_match else "",
|
||||||
|
cdn_match.group(1).strip() if cdn_match else "",
|
||||||
|
thumb_match.group(1).strip() if thumb_match else "",
|
||||||
|
])
|
||||||
|
urls = [url for index, url in enumerate(urls) if url and url not in urls[:index]]
|
||||||
|
|
||||||
|
if not aeskey or not urls:
|
||||||
|
return {}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"aeskey": aeskey_match.group(1),
|
"aeskey": aeskey,
|
||||||
"url": url_match.group(1),
|
"url": urls[0],
|
||||||
"md5": md5_match.group(1) if md5_match else "",
|
"urls": urls,
|
||||||
"length": int(length_match.group(1)) if length_match else 0,
|
"md5": md5,
|
||||||
|
"length": length,
|
||||||
}
|
}
|
||||||
|
|
||||||
async def _process_emoji_record(self, msg_record: Dict) -> bool:
|
async def _process_emoji_record(self, msg_record: Dict) -> bool:
|
||||||
@@ -163,8 +199,24 @@ class MessageStorage:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
base64_str = await self.client.download_cdn_file(emoji_info["aeskey"], emoji_info["url"])
|
base64_str = None
|
||||||
|
last_error = None
|
||||||
|
for file_url in emoji_info.get("urls", []) or [emoji_info.get("url", "")]:
|
||||||
|
if not file_url:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
base64_str = await self.client.download_cdn_file(emoji_info["aeskey"], file_url)
|
||||||
|
if base64_str:
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
last_error = e
|
||||||
|
logger.warning(
|
||||||
|
f"表情下载地址尝试失败: msg_id={message_id}, url={file_url[:120]}, error={e}"
|
||||||
|
)
|
||||||
if not base64_str:
|
if not base64_str:
|
||||||
|
if last_error:
|
||||||
|
logger.warning(f"表情下载全部地址均失败: msg_id={message_id}, error={last_error}")
|
||||||
|
return False
|
||||||
logger.warning(f"表情下载返回为空: msg_id={message_id}")
|
logger.warning(f"表情下载返回为空: msg_id={message_id}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user