feat: improve quoted message rendering in dashboard

This commit is contained in:
liuwei
2026-04-07 17:35:03 +08:00
parent 3d56b3895a
commit 867f00435c
3 changed files with 115 additions and 28 deletions

View File

@@ -3,6 +3,93 @@ import html
import re
def _clean_text(value: str) -> str:
if not value:
return ""
value = html.unescape(value)
value = re.sub(r"<br\s*/?>", "\n", value, flags=re.IGNORECASE)
value = re.sub(r"<[^>]+>", "", value)
return value.strip()
def _extract_first(pattern: str, text: str, default: str = "") -> str:
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
return match.group(1) if match else default
def _format_referenced_content(ref_type: str, quoted_content: str, xml_content: str) -> str:
cleaned = _clean_text(quoted_content)
lower_xml = (quoted_content or "") + (xml_content or "")
lower_xml = lower_xml.lower()
if ref_type in {"3"} or "<img" in lower_xml or "cdnthumburl" in lower_xml:
return "[图片]"
if ref_type in {"43", "62"} or "<videomsg" in lower_xml or "cdnvideourl" in lower_xml:
return "[视频]"
if ref_type in {"47", "1048625", "1090519089"} or "<emoji" in lower_xml or "<emoticonmd5>" in lower_xml:
return "[表情]"
if ref_type in {"34"} or "<voicemsg" in lower_xml:
return "[语音]"
if ref_type in {"48"} or "<location" in lower_xml:
return "[位置]"
if ref_type in {"49"}:
title = _extract_first(r"<title>(.*?)</title>", quoted_content) or _extract_first(r"<title>(.*?)</title>", xml_content)
title = _clean_text(title)
return f"[链接] {title}" if title else "[链接]"
if cleaned:
return cleaned
return "[消息]"
def _extract_media_preview(ref_type: str, quoted_content: str) -> dict:
payload = html.unescape(quoted_content or "")
preview = {"reference_type": "text", "preview_image": "", "preview_video_thumb": ""}
if ref_type in {"3"} or "<img" in payload.lower():
preview["reference_type"] = "image"
preview["preview_image"] = (
_extract_first(r'cdnthumburl="(.*?)"', payload)
or _extract_first(r"<cdnthumburl><!\[CDATA\[(.*?)\]\]></cdnthumburl>", payload)
or _extract_first(r"<cdnmidimgurl><!\[CDATA\[(.*?)\]\]></cdnmidimgurl>", payload)
)
return preview
if ref_type in {"43", "62"} or "<videomsg" in payload.lower():
preview["reference_type"] = "video"
preview["preview_video_thumb"] = (
_extract_first(r'cdnthumburl="(.*?)"', payload)
or _extract_first(r"<cdnthumburl><!\[CDATA\[(.*?)\]\]></cdnthumburl>", payload)
)
return preview
if ref_type in {"47", "1048625", "1090519089"} or "<emoji" in payload.lower():
preview["reference_type"] = "emoji"
return preview
return preview
def parse_quote_message(xml_content: str) -> dict:
xml_content = xml_content.replace('&lt;', '<').replace('&gt;', '>')
main_content = _clean_text(_extract_first(r'<title>(.*?)</title>', xml_content, "[无标题]")) or "[无标题]"
display_name = _clean_text(_extract_first(r'<displayname>(.*?)</displayname>', xml_content, "未知用户")) or "未知用户"
quoted_content = _extract_first(r'<refermsg>.*?<content>(.*?)</content>', xml_content)
ref_type = _extract_first(r'<refermsg>.*?<type>(.*?)</type>', xml_content)
pretty_reference = _format_referenced_content(ref_type, quoted_content, xml_content)
media_preview = _extract_media_preview(ref_type, quoted_content)
return {
"main_content": main_content,
"display_name": display_name,
"quoted_content": pretty_reference,
"reference_type": media_preview.get("reference_type", "text"),
"preview_image": media_preview.get("preview_image", ""),
"preview_video_thumb": media_preview.get("preview_video_thumb", ""),
"formatted_message": f"{main_content}\n引用 {display_name}{pretty_reference}" if display_name and pretty_reference else main_content
}
def format_quote_message(xml_content):
"""
格式化引用消息
@@ -14,31 +101,7 @@ def format_quote_message(xml_content):
格式化后的消息文本
"""
try:
xml_content = xml_content.replace('&lt;', '<').replace('&gt;', '>')
# 使用正则表达式直接提取关键信息避免XML解析问题
title_match = re.search(r'<title>(.*?)</title>', xml_content)
main_content = title_match.group(1) if title_match else "[无标题]"
# 提取引用消息的发送者和内容
display_name_match = re.search(r'<displayname>(.*?)</displayname>', xml_content)
display_name = display_name_match.group(1) if display_name_match else "未知用户"
quoted_content_match = re.search(r'<refermsg>.*?<content>(.*?)</content>', xml_content, re.DOTALL)
quoted_content = quoted_content_match.group(1) if quoted_content_match else ""
# 解码HTML实体
try:
quoted_content = html.unescape(quoted_content)
except:
pass # 如果解码失败,使用原始内容
# 构建格式化的引用消息
if display_name and quoted_content:
formatted_message = f"{main_content}\n引用 {display_name}{quoted_content}"
return formatted_message
return main_content
return parse_quote_message(xml_content)["formatted_message"]
except Exception as e:
# 如果解析失败尝试提取title标签内容
try: