abot/utils/string_utils.py

def remove_trailing_content(text, delimiter='---'):
    """
    剔除文本中最后一个指定分隔符及其后面的内容。

    参数:
        text (str): 输入的文本
        delimiter (str): 要查找的分隔符，默认为 '---'

    返回:
        str: 剔除分隔符及其后面内容后的文本
    """
    index = text.rfind(delimiter)
    if index != -1:
        return text[:index].strip()
    return text

import re

def remove_grok_render_tags(text: str) -> str:
    if not text:
        return text
    cleaned = re.sub(r'<\s*grok:[^>]+?>[\s\S]*?<\s*/\s*grok:[^>]+?>', '', text, flags=re.IGNORECASE)
    cleaned = re.sub(r'<\s*grok:[^>]+?/?>', '', cleaned, flags=re.IGNORECASE)
    return cleaned


def remove_reasoning_content(text: str) -> str:
    """移除模型返回中的思考/推理内容，仅保留最终可展示结果。"""
    if not text:
        return text

    cleaned = text

    # 常见的思考标签
    cleaned = re.sub(r'<\s*think\s*>[\s\S]*?<\s*/\s*think\s*>', '', cleaned, flags=re.IGNORECASE)
    cleaned = re.sub(r'<\s*thinking\s*>[\s\S]*?<\s*/\s*thinking\s*>', '', cleaned, flags=re.IGNORECASE)
    cleaned = re.sub(r'<\s*reasoning\s*>[\s\S]*?<\s*/\s*reasoning\s*>', '', cleaned, flags=re.IGNORECASE)

    # 常见 markdown 思考段落
    cleaned = re.sub(
        r'^\s*(#{1,6}\s*)?(思考过程|推理过程|分析过程|reasoning|thinking)\s*[:：]?\s*$[\s\S]*?(?=^\s*(#{1,6}\s*)?\S|\Z)',
        '',
        cleaned,
        flags=re.IGNORECASE | re.MULTILINE
    )

    # 某些模型会输出“思考内容：...最终答案：...”
    cleaned = re.sub(
        r'^\s*(思考内容|思维链|推理过程|分析过程)\s*[:：][\s\S]*?(?=(最终答案|总结|摘要|#|\Z))',
        '',
        cleaned,
        flags=re.IGNORECASE
    )

    cleaned = remove_grok_render_tags(cleaned)
    cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
    return cleaned