去除30000字符以上内容,防止超长
This commit is contained in:
@@ -10,6 +10,23 @@ def compress_chat_data(chat_data_str, time_threshold=5):
|
|||||||
:param time_threshold: 同一发信人连续发言间隔小于该值(秒),则合并
|
:param time_threshold: 同一发信人连续发言间隔小于该值(秒),则合并
|
||||||
:return: 压缩后的聊天数据的长字符串
|
:return: 压缩后的聊天数据的长字符串
|
||||||
"""
|
"""
|
||||||
|
# 如果字符串长度超过30000,则去除前面的聊天记录
|
||||||
|
if len(chat_data_str) > 30000:
|
||||||
|
lines = chat_data_str.splitlines()
|
||||||
|
total_length = 0
|
||||||
|
cut_index = 0
|
||||||
|
|
||||||
|
# 从后往前计算,找到保留哪些行
|
||||||
|
for i in range(len(lines) - 1, -1, -1):
|
||||||
|
line_length = len(lines[i]) + 1 # +1 是为了计入换行符
|
||||||
|
total_length += line_length
|
||||||
|
if total_length > 30000:
|
||||||
|
cut_index = i + 1 # 保留这个索引之后的行
|
||||||
|
break
|
||||||
|
|
||||||
|
# 只保留后面的聊天记录
|
||||||
|
chat_data_str = '\n'.join(lines[cut_index:])
|
||||||
|
|
||||||
# 解析原始聊天数据为列表
|
# 解析原始聊天数据为列表
|
||||||
chat_data = []
|
chat_data = []
|
||||||
for line in chat_data_str.splitlines():
|
for line in chat_data_str.splitlines():
|
||||||
@@ -25,6 +42,7 @@ def compress_chat_data(chat_data_str, time_threshold=5):
|
|||||||
timestamp, sender, content = parts
|
timestamp, sender, content = parts
|
||||||
chat_data.append((timestamp, sender, content))
|
chat_data.append((timestamp, sender, content))
|
||||||
|
|
||||||
|
# 其余代码保持不变
|
||||||
if not chat_data:
|
if not chat_data:
|
||||||
return "" # 如果没有有效数据,返回空字符串
|
return "" # 如果没有有效数据,返回空字符串
|
||||||
|
|
||||||
@@ -53,7 +71,7 @@ def compress_chat_data(chat_data_str, time_threshold=5):
|
|||||||
|
|
||||||
# 检查是否需要合并消息
|
# 检查是否需要合并消息
|
||||||
if (prev_sender == sender and prev_time_obj is not None and
|
if (prev_sender == sender and prev_time_obj is not None and
|
||||||
(time_obj - prev_time_obj).total_seconds() <= time_threshold):
|
(time_obj - prev_time_obj).total_seconds() <= time_threshold):
|
||||||
# 合并消息,更新最后一条消息
|
# 合并消息,更新最后一条消息
|
||||||
compressed_data[-1] = f"{prev_time_obj.strftime('%H:%M:%S')},{sender},{prev_content} {content}"
|
compressed_data[-1] = f"{prev_time_obj.strftime('%H:%M:%S')},{sender},{prev_content} {content}"
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user