diff --git a/utils/wechat/message_to_db.py b/utils/wechat/message_to_db.py index b74813e..c8a38c8 100644 --- a/utils/wechat/message_to_db.py +++ b/utils/wechat/message_to_db.py @@ -3,7 +3,6 @@ import time from datetime import datetime, timedelta import xml.etree.ElementTree as ET import concurrent.futures # 添加线程池支持 -import html import os import base64 import re @@ -127,61 +126,26 @@ class MessageStorage: if not xml_content: return {} - normalized_xml = html.unescape(xml_content) - aeskey = "" - md5 = "" - length = 0 - urls = [] - - try: - root = ET.fromstring(normalized_xml) - emoji_node = root.find(".//emoji") - if emoji_node is not None: - aeskey = (emoji_node.attrib.get("aeskey") or "").strip() - md5 = (emoji_node.attrib.get("md5") or "").strip() - try: - length = int((emoji_node.attrib.get("len") or "0").strip() or 0) - except Exception: - length = 0 - urls.extend([ - (emoji_node.attrib.get("encrypturl") or "").strip(), - (emoji_node.attrib.get("cdnurl") or "").strip(), - (emoji_node.attrib.get("thumburl") or "").strip(), - ]) - except Exception: - pass - - if not aeskey: - aeskey_match = self._aeskey_re.search(normalized_xml) - aeskey = aeskey_match.group(1).strip() if aeskey_match else "" - - if not md5: - md5_match = re.search(r'md5="(.*?)"', normalized_xml) - md5 = md5_match.group(1).strip() if md5_match else "" - - if not length: - length_match = re.search(r'len="(\d+)"', normalized_xml) - length = int(length_match.group(1)) if length_match else 0 - - encrypt_match = self._emoji_encrypt_re.search(normalized_xml) - cdn_match = self._emoji_cdn_re.search(normalized_xml) - thumb_match = self._emoji_thumb_re.search(normalized_xml) - urls.extend([ - encrypt_match.group(1).strip() if encrypt_match else "", - cdn_match.group(1).strip() if cdn_match else "", - thumb_match.group(1).strip() if thumb_match else "", - ]) - urls = [url for index, url in enumerate(urls) if url and url not in urls[:index]] - - if not aeskey or not urls: + aeskey_match = self._aeskey_re.search(xml_content) + if not aeskey_match: return {} + url_match = ( + self._emoji_cdn_re.search(xml_content) + or self._emoji_encrypt_re.search(xml_content) + or self._emoji_thumb_re.search(xml_content) + ) + if not url_match: + return {} + + md5_match = re.search(r'md5="(.*?)"', xml_content) + length_match = re.search(r'len="(\d+)"', xml_content) + return { - "aeskey": aeskey, - "url": urls[0], - "urls": urls, - "md5": md5, - "length": length, + "aeskey": aeskey_match.group(1), + "url": url_match.group(1), + "md5": md5_match.group(1) if md5_match else "", + "length": int(length_match.group(1)) if length_match else 0, } async def _process_emoji_record(self, msg_record: Dict) -> bool: @@ -199,24 +163,8 @@ class MessageStorage: return False try: - base64_str = None - last_error = None - for file_url in emoji_info.get("urls", []) or [emoji_info.get("url", "")]: - if not file_url: - continue - try: - base64_str = await self.client.download_cdn_file(emoji_info["aeskey"], file_url) - if base64_str: - break - except Exception as e: - last_error = e - logger.warning( - f"表情下载地址尝试失败: msg_id={message_id}, url={file_url[:120]}, error={e}" - ) + base64_str = await self.client.download_cdn_file(emoji_info["aeskey"], emoji_info["url"]) if not base64_str: - if last_error: - logger.warning(f"表情下载全部地址均失败: msg_id={message_id}, error={last_error}") - return False logger.warning(f"表情下载返回为空: msg_id={message_id}") return False