Files
abot/utils/string_utils.py

58 lines
1.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
def remove_trailing_content(text, delimiter='---'):
"""
剔除文本中最后一个指定分隔符及其后面的内容。
参数:
text (str): 输入的文本
delimiter (str): 要查找的分隔符,默认为 '---'
返回:
str: 剔除分隔符及其后面内容后的文本
"""
index = text.rfind(delimiter)
if index != -1:
return text[:index].strip()
return text
import re
def remove_grok_render_tags(text: str) -> str:
if not text:
return text
cleaned = re.sub(r'<\s*grok:[^>]+?>[\s\S]*?<\s*/\s*grok:[^>]+?>', '', text, flags=re.IGNORECASE)
cleaned = re.sub(r'<\s*grok:[^>]+?/?>', '', cleaned, flags=re.IGNORECASE)
return cleaned
def remove_reasoning_content(text: str) -> str:
"""移除模型返回中的思考/推理内容,仅保留最终可展示结果。"""
if not text:
return text
cleaned = text
# 常见的思考标签
cleaned = re.sub(r'<\s*think\s*>[\s\S]*?<\s*/\s*think\s*>', '', cleaned, flags=re.IGNORECASE)
cleaned = re.sub(r'<\s*thinking\s*>[\s\S]*?<\s*/\s*thinking\s*>', '', cleaned, flags=re.IGNORECASE)
cleaned = re.sub(r'<\s*reasoning\s*>[\s\S]*?<\s*/\s*reasoning\s*>', '', cleaned, flags=re.IGNORECASE)
# 常见 markdown 思考段落
cleaned = re.sub(
r'^\s*(#{1,6}\s*)?(思考过程|推理过程|分析过程|reasoning|thinking)\s*[:]?\s*$[\s\S]*?(?=^\s*(#{1,6}\s*)?\S|\Z)',
'',
cleaned,
flags=re.IGNORECASE | re.MULTILINE
)
# 某些模型会输出“思考内容:...最终答案:...”
cleaned = re.sub(
r'^\s*(思考内容|思维链|推理过程|分析过程)\s*[:][\s\S]*?(?=(最终答案|总结|摘要|#|\Z))',
'',
cleaned,
flags=re.IGNORECASE
)
cleaned = remove_grok_render_tags(cleaned)
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
return cleaned