import xml.etree.ElementTree as ET import html import re def _clean_text(value: str) -> str: if not value: return "" value = html.unescape(value) value = re.sub(r"", "\n", value, flags=re.IGNORECASE) value = re.sub(r"<[^>]+>", "", value) return value.strip() def _extract_first(pattern: str, text: str, default: str = "") -> str: match = re.search(pattern, text, re.DOTALL | re.IGNORECASE) return match.group(1) if match else default def _format_referenced_content(ref_type: str, quoted_content: str, xml_content: str) -> str: cleaned = _clean_text(quoted_content) lower_xml = (quoted_content or "") + (xml_content or "") lower_xml = lower_xml.lower() if ref_type in {"3"} or "" in lower_xml: return "[表情]" if ref_type in {"34"} or "(.*?)", quoted_content) or _extract_first(r"(.*?)", xml_content) title = _clean_text(title) return f"[链接] {title}" if title else "[链接]" if cleaned: return cleaned return "[消息]" def _extract_media_preview(ref_type: str, quoted_content: str) -> dict: payload = html.unescape(quoted_content or "") preview = {"reference_type": "text", "preview_image": "", "preview_video_thumb": ""} if ref_type in {"3"} or "", payload) or _extract_first(r"", payload) ) return preview if ref_type in {"43", "62"} or "", payload) ) return preview if ref_type in {"47", "1048625", "1090519089"} or " dict: xml_content = xml_content.replace('<', '<').replace('>', '>') main_content = _clean_text(_extract_first(r'(.*?)', xml_content, "[无标题]")) or "[无标题]" display_name = _clean_text(_extract_first(r'(.*?)', xml_content, "未知用户")) or "未知用户" quoted_content = _extract_first(r'.*?(.*?)', xml_content) ref_type = _extract_first(r'.*?(.*?)', xml_content) pretty_reference = _format_referenced_content(ref_type, quoted_content, xml_content) media_preview = _extract_media_preview(ref_type, quoted_content) return { "main_content": main_content, "display_name": display_name, "quoted_content": pretty_reference, "reference_type": media_preview.get("reference_type", "text"), "preview_image": media_preview.get("preview_image", ""), "preview_video_thumb": media_preview.get("preview_video_thumb", ""), "formatted_message": f"{main_content}\n引用 {display_name}:{pretty_reference}" if display_name and pretty_reference else main_content } def format_quote_message(xml_content): """ 格式化引用消息 Args: xml_content: XML格式的消息内容 Returns: 格式化后的消息文本 """ try: return parse_quote_message(xml_content)["formatted_message"] except Exception as e: # 如果解析失败,尝试提取title标签内容 try: match = re.search(r'(.*?)', xml_content) if match: return match.group(1) except: pass return f"[引用消息]" # 返回一个简单的标识,而不是错误信息 if __name__ == '__main__': strs =""" 那也没事,都是富二代,都是送出国镀个金回家继承家业的 57 0 0 0 0 1 2568355911763278189 45317011307@chatroom wxid_twrbhdxddlud12 水牛🐃 都是富二代。 <msgsource> <sec_msg_node> <alnode> <fr>1</fr> </alnode> </sec_msg_node> <pua>1</pua> <silence>1</silence> <membercount>125</membercount> <signature>N0_V1_DaX/Y3s2|v1_PvH3m56P</signature> <tmp_node> <publisher-id></publisher-id> </tmp_node> </msgsource> 1743491847 maoyijie 0 1 """ sss = format_quote_message(strs) print(sss)