feat(ai): clean reasoning content from replies

This commit is contained in:
liuwei
2026-04-07 09:23:48 +08:00
parent 496463c442
commit 51fe971cda
4 changed files with 73 additions and 10 deletions

View File

@@ -21,7 +21,7 @@ from utils.revoke.message_auto_revoke import MessageAutoRevoke
from utils.robot_cmd.robot_command import Feature, PermissionStatus, GroupBotManager
from utils.decorator.points_decorator import plugin_points_cost
from utils.media_downloader import MediaDownloader
from utils.string_utils import remove_trailing_content, remove_grok_render_tags
from utils.string_utils import remove_reasoning_content, remove_trailing_content, remove_grok_render_tags
from wechat_ipad import WechatAPIClient
from wechat_ipad.models.message import MessageType
import aiohttp
@@ -250,6 +250,12 @@ class DifyPlugin(MessagePluginInterface):
response: str, roomid: str) -> Tuple[bool, str]:
"""发送响应消息的辅助方法"""
try:
if response and not os.path.isfile(response):
response = remove_reasoning_content(response)
response = remove_trailing_content(response)
response = remove_grok_render_tags(response)
response = re.sub(r'\n{3,}', '\n\n', response).strip()
# 判断是否为本地文件路径
if os.path.isfile(response):
# 如果是文件路径,使用发送文件方法
@@ -549,6 +555,12 @@ class DifyPlugin(MessagePluginInterface):
# 获取token使用情况
total_tokens = response_data.get("data", {}).get("total_tokens", 0)
if answer and not os.path.isfile(answer):
answer = remove_reasoning_content(answer)
answer = remove_trailing_content(answer)
answer = remove_grok_render_tags(answer)
answer = re.sub(r'\n{3,}', '\n\n', answer).strip()
# 更新会话历史
self.conversations[session_id].append({
"role": "user",

View File

@@ -6,7 +6,10 @@ enabled = true
[api]
api_key = "app-McGLzBhBjeBCSEi7n83MtuTo"
api_url = "http://192.168.2.240/v1/chat-messages"
response_mode = "streaming"
response_mode = "blocking"
connect_timeout_seconds = 10
request_timeout_seconds = 180
retry_delays_seconds = [10, 20]
[output]
output_dir = "output"

View File

@@ -21,7 +21,7 @@ from utils.decorator.rate_limit_decorator import group_feature_rate_limit
from utils.markdown_to_image import convert_md_str_to_image
from utils.revoke.message_auto_revoke import MessageAutoRevoke
from utils.robot_cmd.robot_command import GroupBotManager, PermissionStatus
from utils.string_utils import remove_trailing_content
from utils.string_utils import remove_reasoning_content, remove_trailing_content
from utils.wechat.contact_manager import ContactManager
from utils.wechat.message_to_db import MessageStorage
from wechat_ipad import WechatAPIClient
@@ -84,7 +84,10 @@ class MessageSummaryPlugin(MessagePluginInterface):
api_config = self._config.get("api", {})
self._api_key = api_config.get("api_key", "app-McGLzBhBjeBCSEi7n83MtuTo")
self._api_url = api_config.get("api_url", "http://192.168.2.240/v1/chat-messages")
self._response_mode = api_config.get("response_mode", "streaming")
self._response_mode = api_config.get("response_mode", "blocking")
self._connect_timeout_seconds = int(api_config.get("connect_timeout_seconds", 10))
self._request_timeout_seconds = int(api_config.get("request_timeout_seconds", 180))
self._retry_delays_seconds = api_config.get("retry_delays_seconds", [10, 20])
self.message_storage = MessageStorage()
db_manager = context.get("db_manager")
if db_manager:
@@ -275,6 +278,16 @@ class MessageSummaryPlugin(MessagePluginInterface):
tokens_info = f"\n\n【tokens】输入: {prompt_tokens} 生成: {completion_tokens} 总: {total_tokens}"
return answer + tokens_info
def _clean_summary_output(self, answer: str) -> str:
"""清理总结输出中的思考内容和无关尾部内容"""
if not answer:
return answer
cleaned = remove_reasoning_content(answer)
cleaned = remove_trailing_content(cleaned)
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
return cleaned
def _get_revoke_manager(self) -> Optional[MessageAutoRevoke]:
"""优先使用消息上下文中的撤回器,定时任务场景则懒初始化一个"""
if self.revoke:
@@ -357,12 +370,15 @@ class MessageSummaryPlugin(MessagePluginInterface):
"Accept": "text/event-stream" if self._response_mode == "streaming" else "application/json"
}
max_retries = 3
retry_delays = [2, 4]
max_retries = len(self._retry_delays_seconds) + 1
for attempt in range(1, max_retries + 1):
try:
custom_timeout = ClientTimeout(total=None, connect=10, sock_read=300)
custom_timeout = ClientTimeout(
total=None,
connect=self._connect_timeout_seconds,
sock_read=self._request_timeout_seconds
)
conn = aiohttp.TCPConnector(keepalive_timeout=60) # 保持连接活跃
async with aiohttp.ClientSession(connector=conn, timeout=custom_timeout) as session:
async with session.post(self._api_url, headers=headers, json=data) as response:
@@ -381,8 +397,7 @@ class MessageSummaryPlugin(MessagePluginInterface):
# 提取回答内容
answer = response_data.get("answer", "")
# 去除广告内容pollinations.ai 的广告
# answer = remove_trailing_content(answer)
answer = self._clean_summary_output(answer)
spath = ""
# 提取token使用情况
metadata = response_data.get("metadata", {})
@@ -420,7 +435,7 @@ class MessageSummaryPlugin(MessagePluginInterface):
self.LOG.error(f"处理总结时出现未知错误: attempt={attempt}/{max_retries}, error={e}")
if attempt < max_retries:
delay = retry_delays[attempt - 1] if attempt - 1 < len(retry_delays) else retry_delays[-1]
delay = self._retry_delays_seconds[attempt - 1]
self.LOG.warning(f"群总结生成失败,准备重试: attempt={attempt}/{max_retries}, delay={delay}s")
await asyncio.sleep(delay)

View File

@@ -22,3 +22,36 @@ def remove_grok_render_tags(text: str) -> str:
cleaned = re.sub(r'<\s*grok:[^>]+?>[\s\S]*?<\s*/\s*grok:[^>]+?>', '', text, flags=re.IGNORECASE)
cleaned = re.sub(r'<\s*grok:[^>]+?/?>', '', cleaned, flags=re.IGNORECASE)
return cleaned
def remove_reasoning_content(text: str) -> str:
"""移除模型返回中的思考/推理内容,仅保留最终可展示结果。"""
if not text:
return text
cleaned = text
# 常见的思考标签
cleaned = re.sub(r'<\s*think\s*>[\s\S]*?<\s*/\s*think\s*>', '', cleaned, flags=re.IGNORECASE)
cleaned = re.sub(r'<\s*thinking\s*>[\s\S]*?<\s*/\s*thinking\s*>', '', cleaned, flags=re.IGNORECASE)
cleaned = re.sub(r'<\s*reasoning\s*>[\s\S]*?<\s*/\s*reasoning\s*>', '', cleaned, flags=re.IGNORECASE)
# 常见 markdown 思考段落
cleaned = re.sub(
r'^\s*(#{1,6}\s*)?(思考过程|推理过程|分析过程|reasoning|thinking)\s*[:]?\s*$[\s\S]*?(?=^\s*(#{1,6}\s*)?\S|\Z)',
'',
cleaned,
flags=re.IGNORECASE | re.MULTILINE
)
# 某些模型会输出“思考内容:...最终答案:...”
cleaned = re.sub(
r'^\s*(思考内容|思维链|推理过程|分析过程)\s*[:][\s\S]*?(?=(最终答案|总结|摘要|#|\Z))',
'',
cleaned,
flags=re.IGNORECASE
)
cleaned = remove_grok_render_tags(cleaned)
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
return cleaned