58 lines
1.9 KiB
Python
58 lines
1.9 KiB
Python
def remove_trailing_content(text, delimiter='---'):
|
||
"""
|
||
剔除文本中最后一个指定分隔符及其后面的内容。
|
||
|
||
参数:
|
||
text (str): 输入的文本
|
||
delimiter (str): 要查找的分隔符,默认为 '---'
|
||
|
||
返回:
|
||
str: 剔除分隔符及其后面内容后的文本
|
||
"""
|
||
index = text.rfind(delimiter)
|
||
if index != -1:
|
||
return text[:index].strip()
|
||
return text
|
||
|
||
import re
|
||
|
||
def remove_grok_render_tags(text: str) -> str:
|
||
if not text:
|
||
return text
|
||
cleaned = re.sub(r'<\s*grok:[^>]+?>[\s\S]*?<\s*/\s*grok:[^>]+?>', '', text, flags=re.IGNORECASE)
|
||
cleaned = re.sub(r'<\s*grok:[^>]+?/?>', '', cleaned, flags=re.IGNORECASE)
|
||
return cleaned
|
||
|
||
|
||
def remove_reasoning_content(text: str) -> str:
|
||
"""移除模型返回中的思考/推理内容,仅保留最终可展示结果。"""
|
||
if not text:
|
||
return text
|
||
|
||
cleaned = text
|
||
|
||
# 常见的思考标签
|
||
cleaned = re.sub(r'<\s*think\s*>[\s\S]*?<\s*/\s*think\s*>', '', cleaned, flags=re.IGNORECASE)
|
||
cleaned = re.sub(r'<\s*thinking\s*>[\s\S]*?<\s*/\s*thinking\s*>', '', cleaned, flags=re.IGNORECASE)
|
||
cleaned = re.sub(r'<\s*reasoning\s*>[\s\S]*?<\s*/\s*reasoning\s*>', '', cleaned, flags=re.IGNORECASE)
|
||
|
||
# 常见 markdown 思考段落
|
||
cleaned = re.sub(
|
||
r'^\s*(#{1,6}\s*)?(思考过程|推理过程|分析过程|reasoning|thinking)\s*[::]?\s*$[\s\S]*?(?=^\s*(#{1,6}\s*)?\S|\Z)',
|
||
'',
|
||
cleaned,
|
||
flags=re.IGNORECASE | re.MULTILINE
|
||
)
|
||
|
||
# 某些模型会输出“思考内容:...最终答案:...”
|
||
cleaned = re.sub(
|
||
r'^\s*(思考内容|思维链|推理过程|分析过程)\s*[::][\s\S]*?(?=(最终答案|总结|摘要|#|\Z))',
|
||
'',
|
||
cleaned,
|
||
flags=re.IGNORECASE
|
||
)
|
||
|
||
cleaned = remove_grok_render_tags(cleaned)
|
||
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
|
||
return cleaned
|