优化表情异步补处理的参数提取与下载重试

- 对表情 XML 先做 HTML 实体还原,避免下载地址中的 & 影响请求
- 优先使用 XML 节点提取 aeskey、md5、len 和多种下载地址,正则作为补充兜底
- 异步补处理时按 encrypturl、cdnurl、thumburl 顺序重试下载
- 所有地址失败时记录业务告警,避免无意义异常栈刷屏
This commit is contained in:
liuwei
2026-04-03 08:46:30 +08:00
parent 079f363382
commit 4d2e841fb3

View File

@@ -3,6 +3,7 @@ import time
from datetime import datetime, timedelta
import xml.etree.ElementTree as ET
import concurrent.futures # 添加线程池支持
import html
import os
import base64
import re
@@ -126,26 +127,61 @@ class MessageStorage:
if not xml_content:
return {}
aeskey_match = self._aeskey_re.search(xml_content)
if not aeskey_match:
return {}
normalized_xml = html.unescape(xml_content)
aeskey = ""
md5 = ""
length = 0
urls = []
url_match = (
self._emoji_cdn_re.search(xml_content)
or self._emoji_encrypt_re.search(xml_content)
or self._emoji_thumb_re.search(xml_content)
)
if not url_match:
return {}
try:
root = ET.fromstring(normalized_xml)
emoji_node = root.find(".//emoji")
if emoji_node is not None:
aeskey = (emoji_node.attrib.get("aeskey") or "").strip()
md5 = (emoji_node.attrib.get("md5") or "").strip()
try:
length = int((emoji_node.attrib.get("len") or "0").strip() or 0)
except Exception:
length = 0
urls.extend([
(emoji_node.attrib.get("encrypturl") or "").strip(),
(emoji_node.attrib.get("cdnurl") or "").strip(),
(emoji_node.attrib.get("thumburl") or "").strip(),
])
except Exception:
pass
md5_match = re.search(r'md5="(.*?)"', xml_content)
length_match = re.search(r'len="(\d+)"', xml_content)
if not aeskey:
aeskey_match = self._aeskey_re.search(normalized_xml)
aeskey = aeskey_match.group(1).strip() if aeskey_match else ""
if not md5:
md5_match = re.search(r'md5="(.*?)"', normalized_xml)
md5 = md5_match.group(1).strip() if md5_match else ""
if not length:
length_match = re.search(r'len="(\d+)"', normalized_xml)
length = int(length_match.group(1)) if length_match else 0
encrypt_match = self._emoji_encrypt_re.search(normalized_xml)
cdn_match = self._emoji_cdn_re.search(normalized_xml)
thumb_match = self._emoji_thumb_re.search(normalized_xml)
urls.extend([
encrypt_match.group(1).strip() if encrypt_match else "",
cdn_match.group(1).strip() if cdn_match else "",
thumb_match.group(1).strip() if thumb_match else "",
])
urls = [url for index, url in enumerate(urls) if url and url not in urls[:index]]
if not aeskey or not urls:
return {}
return {
"aeskey": aeskey_match.group(1),
"url": url_match.group(1),
"md5": md5_match.group(1) if md5_match else "",
"length": int(length_match.group(1)) if length_match else 0,
"aeskey": aeskey,
"url": urls[0],
"urls": urls,
"md5": md5,
"length": length,
}
async def _process_emoji_record(self, msg_record: Dict) -> bool:
@@ -163,8 +199,24 @@ class MessageStorage:
return False
try:
base64_str = await self.client.download_cdn_file(emoji_info["aeskey"], emoji_info["url"])
base64_str = None
last_error = None
for file_url in emoji_info.get("urls", []) or [emoji_info.get("url", "")]:
if not file_url:
continue
try:
base64_str = await self.client.download_cdn_file(emoji_info["aeskey"], file_url)
if base64_str:
break
except Exception as e:
last_error = e
logger.warning(
f"表情下载地址尝试失败: msg_id={message_id}, url={file_url[:120]}, error={e}"
)
if not base64_str:
if last_error:
logger.warning(f"表情下载全部地址均失败: msg_id={message_id}, error={last_error}")
return False
logger.warning(f"表情下载返回为空: msg_id={message_id}")
return False