import xml.etree.ElementTree as ET
import html
import re
def _clean_text(value: str) -> str:
if not value:
return ""
value = html.unescape(value)
value = re.sub(r"
", "\n", value, flags=re.IGNORECASE)
value = re.sub(r"<[^>]+>", "", value)
return value.strip()
def _extract_first(pattern: str, text: str, default: str = "") -> str:
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
return match.group(1) if match else default
def _format_referenced_content(ref_type: str, quoted_content: str, xml_content: str) -> str:
cleaned = _clean_text(quoted_content)
lower_xml = (quoted_content or "") + (xml_content or "")
lower_xml = lower_xml.lower()
if ref_type in {"3"} or "
" in lower_xml:
return "[表情]"
if ref_type in {"34"} or "(.*?)", quoted_content) or _extract_first(r"(.*?)", xml_content)
title = _clean_text(title)
return f"[链接] {title}" if title else "[链接]"
if cleaned:
return cleaned
return "[消息]"
def _extract_media_preview(ref_type: str, quoted_content: str) -> dict:
payload = html.unescape(quoted_content or "")
preview = {"reference_type": "text", "preview_image": "", "preview_video_thumb": ""}
if ref_type in {"3"} or "
", payload)
or _extract_first(r"", payload)
)
return preview
if ref_type in {"43", "62"} or "", payload)
)
return preview
if ref_type in {"47", "1048625", "1090519089"} or " dict:
xml_content = xml_content.replace('<', '<').replace('>', '>')
main_content = _clean_text(_extract_first(r'(.*?)', xml_content, "[无标题]")) or "[无标题]"
display_name = _clean_text(_extract_first(r'(.*?)', xml_content, "未知用户")) or "未知用户"
quoted_content = _extract_first(r'.*?(.*?)', xml_content)
ref_type = _extract_first(r'.*?(.*?)', xml_content)
reference_svrid = _extract_first(r'.*?(.*?)', xml_content)
reference_md5 = (
_extract_first(r'md5="(.*?)"', html.unescape(quoted_content or ""))
or _extract_first(r"(.*?)", html.unescape(quoted_content or ""))
)
pretty_reference = _format_referenced_content(ref_type, quoted_content, xml_content)
media_preview = _extract_media_preview(ref_type, quoted_content)
return {
"main_content": main_content,
"display_name": display_name,
"quoted_content": pretty_reference,
"reference_svrid": reference_svrid,
"reference_md5": reference_md5,
"reference_type": media_preview.get("reference_type", "text"),
"preview_image": media_preview.get("preview_image", ""),
"preview_video_thumb": media_preview.get("preview_video_thumb", ""),
"formatted_message": f"{main_content}\n引用 {display_name}:{pretty_reference}" if display_name and pretty_reference else main_content
}
def format_quote_message(xml_content):
"""
格式化引用消息
Args:
xml_content: XML格式的消息内容
Returns:
格式化后的消息文本
"""
try:
return parse_quote_message(xml_content)["formatted_message"]
except Exception as e:
# 如果解析失败,尝试提取title标签内容
try:
match = re.search(r'(.*?)', xml_content)
if match:
return match.group(1)
except:
pass
return f"[引用消息]" # 返回一个简单的标识,而不是错误信息
if __name__ == '__main__':
strs ="""
那也没事,都是富二代,都是送出国镀个金回家继承家业的 57 0 0 0 0 1 2568355911763278189 45317011307@chatroom wxid_twrbhdxddlud12 水牛🐃 都是富二代。 <msgsource> <sec_msg_node> <alnode> <fr>1</fr> </alnode> </sec_msg_node> <pua>1</pua> <silence>1</silence> <membercount>125</membercount> <signature>N0_V1_DaX/Y3s2|v1_PvH3m56P</signature> <tmp_node> <publisher-id></publisher-id> </tmp_node> </msgsource> 1743491847 maoyijie 0 1
"""
sss = format_quote_message(strs)
print(sss)