调整xml 获取逻辑。

This commit is contained in:
liuwei
2025-04-22 14:13:17 +08:00
parent da5ce935b9
commit f34c1db686
2 changed files with 72 additions and 20 deletions

View File

@@ -1,9 +1,11 @@
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Dict, Any from typing import Optional, Dict, Any, re
from enum import Enum from enum import Enum
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import json import json
import main
class MessageType(Enum): class MessageType(Enum):
"""消息类型枚举""" """消息类型枚举"""
@@ -55,15 +57,51 @@ class AppMessageType(Enum):
class MessageContent: class MessageContent:
"""消息内容""" """消息内容"""
raw_content: str # 原始内容 raw_content: str # 原始内容
xml_content: Optional[ET.Element] = None # XML内容(如果有) xml_content: str = "" # XML内容(如果有)
clean_content: str = "" # 清理后的内容(去除发信人信息)
sender: str = "" # 发信人wxid
def __post_init__(self): def __post_init__(self):
"""处理XML内容""" """处理XML内容和清理发信人信息"""
if self.raw_content.startswith('<?xml') or self.raw_content.startswith('<msg'): # 处理XML内容
# 清理发信人信息
self.clean_content = self.clean_sender_info(self.raw_content)
if self.clean_content.startswith('<?xml') or self.clean_content.startswith('<msg'):
try: try:
self.xml_content = ET.fromstring(self.raw_content) self.xml_content = self.clean_content
except ET.ParseError: except ET.ParseError:
self.xml_content = None self.xml_content = ""
def clean_sender_info(self, content: str) -> str:
"""清理内容中的发信人信息"""
if not content:
return ""
import re
# 如果有发信人信息,优先使用发信人信息进行清理
if self.sender:
# 尝试移除发信人前缀包括昵称和wxid两种情况
patterns = [
f"^{self.sender}[:]\\s*\\n", # wxid格式
f"^[^\\n]+?\\({self.sender}\\)[:]\\s*\\n", # 昵称(wxid)格式
f"^[^\\n]+?<{self.sender}>[:]\\s*\\n", # 昵称<wxid>格式
]
for pattern in patterns:
content = re.sub(pattern, '', content)
# 通用清理规则(用于处理其他可能的格式)
patterns = [
r'^wxid_[a-zA-Z0-9_]+[:]\s*\n', # wxid格式
r'^[^:\n]+\([^)]+\)[:]\s*\n', # 昵称(wxid)格式
r'^[^:\n]+<[^>]+>[:]\s*\n', # 昵称<wxid>格式
r'^[^:\n]+[:]\s*\n', # 其他格式
]
for pattern in patterns:
content = re.sub(pattern, '', content)
return content.strip()
@dataclass @dataclass
@@ -129,16 +167,23 @@ class WxMessage:
data = json_data.get("Data", {}) data = json_data.get("Data", {})
to_user = data.get("ToUserName", {}).get("string", "") to_user = data.get("ToUserName", {}).get("string", "")
# 获取原始内容和发信人
content_str = data.get("Content", {}).get("string", "")
sender = data.get("FromUserName", {}).get("string", "")
# 创建MessageContent对象时传入发信人信息
message_content = MessageContent(content_str, sender=sender)
return cls( return cls(
type_name=json_data.get("TypeName", ""), type_name=json_data.get("TypeName", ""),
appid=json_data.get("Appid", ""), appid=json_data.get("Appid", ""),
wxid=json_data.get("Wxid", ""), wxid=json_data.get("Wxid", ""),
msg_id=data.get("MsgId", 0), msg_id=data.get("MsgId", 0),
sender=data.get("FromUserName", {}).get("string", ""), sender=sender,
to_user=to_user, to_user=to_user,
roomid=to_user if to_user.endswith("@chatroom") else "", # 设置room_id roomid=to_user if to_user.endswith("@chatroom") else "",
msg_type=MessageType(data.get("MsgType", 0)), msg_type=MessageType(data.get("MsgType", 0)),
content=MessageContent(data.get("Content", {}).get("string", "")), content=message_content, # 使用包含发信人信息的MessageContent
create_time=data.get("CreateTime", 0), create_time=data.get("CreateTime", 0),
push_content=data.get("PushContent"), push_content=data.get("PushContent"),
new_msg_id=data.get("NewMsgId", 0), new_msg_id=data.get("NewMsgId", 0),
@@ -155,8 +200,8 @@ class WxMessage:
# 处理不同类型的消息内容 # 处理不同类型的消息内容
content_str = "" content_str = ""
if self.msg_type == MessageType.TEXT: if self.msg_type == MessageType.TEXT:
# 文本消息直接显示内容 # 文本消息直接显示清理后的内容
content_str = self.content.raw_content content_str = self.content.clean_content
elif self.msg_type == MessageType.IMAGE: elif self.msg_type == MessageType.IMAGE:
# 图片消息显示图片信息 # 图片消息显示图片信息
img_content = self.get_image_content() img_content = self.get_image_content()
@@ -231,7 +276,6 @@ class WxMessage:
def from_group(self) -> bool: def from_group(self) -> bool:
return self.to_user.endswith("@chatroom") return self.to_user.endswith("@chatroom")
def is_at(self, wxid) -> bool: def is_at(self, wxid) -> bool:
"""是否被 @:群消息,在 @ 名单里,并且不是 @ 所有人""" """是否被 @:群消息,在 @ 名单里,并且不是 @ 所有人"""
if not self.from_group(): if not self.from_group():
@@ -332,3 +376,11 @@ class WxMessage:
except (AttributeError, ValueError): except (AttributeError, ValueError):
pass pass
return None return None
if __name__ == '__main__':
content_str = """wxid_g6vc38ifs1an22:\n1"""
content = MessageContent(content_str, sender="Jyunere")
print(content.raw_content)
print(content.xml_content)
print(content.clean_content)

View File

@@ -96,7 +96,7 @@ def main(chat_type: int):
app_id, error_msg = client.login(app_id=app_id) app_id, error_msg = client.login(app_id=app_id)
if error_msg: if error_msg:
print("登录失败") logger.error("登录失败")
return return
resp = client.set_callback(token, callback_url) resp = client.set_callback(token, callback_url)
@@ -106,7 +106,7 @@ def main(chat_type: int):
if not config.APP_ID: if not config.APP_ID:
# 更新配置文件中的APP_ID # 更新配置文件中的APP_ID
config.update_config('gewechat', 'app_id', app_id) config.update_config('gewechat', 'app_id', app_id)
print(f"已将新的APP_ID: {app_id} 写入配置文件") logger.info(f"已将新的APP_ID: {app_id} 写入配置文件")
# 同时更新当前配置对象中的APP_ID # 同时更新当前配置对象中的APP_ID
config.APP_ID = app_id config.APP_ID = app_id