去除30000字符以上内容,防止超长

This commit is contained in:
liuwei
2025-03-14 14:59:00 +08:00
parent e4d8a3aca7
commit ab302d77d8

View File

@@ -10,6 +10,23 @@ def compress_chat_data(chat_data_str, time_threshold=5):
:param time_threshold: 同一发信人连续发言间隔小于该值(秒),则合并 :param time_threshold: 同一发信人连续发言间隔小于该值(秒),则合并
:return: 压缩后的聊天数据的长字符串 :return: 压缩后的聊天数据的长字符串
""" """
# 如果字符串长度超过30000则去除前面的聊天记录
if len(chat_data_str) > 30000:
lines = chat_data_str.splitlines()
total_length = 0
cut_index = 0
# 从后往前计算,找到保留哪些行
for i in range(len(lines) - 1, -1, -1):
line_length = len(lines[i]) + 1 # +1 是为了计入换行符
total_length += line_length
if total_length > 30000:
cut_index = i + 1 # 保留这个索引之后的行
break
# 只保留后面的聊天记录
chat_data_str = '\n'.join(lines[cut_index:])
# 解析原始聊天数据为列表 # 解析原始聊天数据为列表
chat_data = [] chat_data = []
for line in chat_data_str.splitlines(): for line in chat_data_str.splitlines():
@@ -25,6 +42,7 @@ def compress_chat_data(chat_data_str, time_threshold=5):
timestamp, sender, content = parts timestamp, sender, content = parts
chat_data.append((timestamp, sender, content)) chat_data.append((timestamp, sender, content))
# 其余代码保持不变
if not chat_data: if not chat_data:
return "" # 如果没有有效数据,返回空字符串 return "" # 如果没有有效数据,返回空字符串
@@ -53,7 +71,7 @@ def compress_chat_data(chat_data_str, time_threshold=5):
# 检查是否需要合并消息 # 检查是否需要合并消息
if (prev_sender == sender and prev_time_obj is not None and if (prev_sender == sender and prev_time_obj is not None and
(time_obj - prev_time_obj).total_seconds() <= time_threshold): (time_obj - prev_time_obj).total_seconds() <= time_threshold):
# 合并消息,更新最后一条消息 # 合并消息,更新最后一条消息
compressed_data[-1] = f"{prev_time_obj.strftime('%H:%M:%S')},{sender},{prev_content} {content}" compressed_data[-1] = f"{prev_time_obj.strftime('%H:%M:%S')},{sender},{prev_content} {content}"
else: else: