feat(ai): clean reasoning content from replies
This commit is contained in:
@@ -21,7 +21,7 @@ from utils.revoke.message_auto_revoke import MessageAutoRevoke
|
||||
from utils.robot_cmd.robot_command import Feature, PermissionStatus, GroupBotManager
|
||||
from utils.decorator.points_decorator import plugin_points_cost
|
||||
from utils.media_downloader import MediaDownloader
|
||||
from utils.string_utils import remove_trailing_content, remove_grok_render_tags
|
||||
from utils.string_utils import remove_reasoning_content, remove_trailing_content, remove_grok_render_tags
|
||||
from wechat_ipad import WechatAPIClient
|
||||
from wechat_ipad.models.message import MessageType
|
||||
import aiohttp
|
||||
@@ -250,6 +250,12 @@ class DifyPlugin(MessagePluginInterface):
|
||||
response: str, roomid: str) -> Tuple[bool, str]:
|
||||
"""发送响应消息的辅助方法"""
|
||||
try:
|
||||
if response and not os.path.isfile(response):
|
||||
response = remove_reasoning_content(response)
|
||||
response = remove_trailing_content(response)
|
||||
response = remove_grok_render_tags(response)
|
||||
response = re.sub(r'\n{3,}', '\n\n', response).strip()
|
||||
|
||||
# 判断是否为本地文件路径
|
||||
if os.path.isfile(response):
|
||||
# 如果是文件路径,使用发送文件方法
|
||||
@@ -549,6 +555,12 @@ class DifyPlugin(MessagePluginInterface):
|
||||
# 获取token使用情况
|
||||
total_tokens = response_data.get("data", {}).get("total_tokens", 0)
|
||||
|
||||
if answer and not os.path.isfile(answer):
|
||||
answer = remove_reasoning_content(answer)
|
||||
answer = remove_trailing_content(answer)
|
||||
answer = remove_grok_render_tags(answer)
|
||||
answer = re.sub(r'\n{3,}', '\n\n', answer).strip()
|
||||
|
||||
# 更新会话历史
|
||||
self.conversations[session_id].append({
|
||||
"role": "user",
|
||||
|
||||
@@ -6,7 +6,10 @@ enabled = true
|
||||
[api]
|
||||
api_key = "app-McGLzBhBjeBCSEi7n83MtuTo"
|
||||
api_url = "http://192.168.2.240/v1/chat-messages"
|
||||
response_mode = "streaming"
|
||||
response_mode = "blocking"
|
||||
connect_timeout_seconds = 10
|
||||
request_timeout_seconds = 180
|
||||
retry_delays_seconds = [10, 20]
|
||||
|
||||
[output]
|
||||
output_dir = "output"
|
||||
|
||||
@@ -21,7 +21,7 @@ from utils.decorator.rate_limit_decorator import group_feature_rate_limit
|
||||
from utils.markdown_to_image import convert_md_str_to_image
|
||||
from utils.revoke.message_auto_revoke import MessageAutoRevoke
|
||||
from utils.robot_cmd.robot_command import GroupBotManager, PermissionStatus
|
||||
from utils.string_utils import remove_trailing_content
|
||||
from utils.string_utils import remove_reasoning_content, remove_trailing_content
|
||||
from utils.wechat.contact_manager import ContactManager
|
||||
from utils.wechat.message_to_db import MessageStorage
|
||||
from wechat_ipad import WechatAPIClient
|
||||
@@ -84,7 +84,10 @@ class MessageSummaryPlugin(MessagePluginInterface):
|
||||
api_config = self._config.get("api", {})
|
||||
self._api_key = api_config.get("api_key", "app-McGLzBhBjeBCSEi7n83MtuTo")
|
||||
self._api_url = api_config.get("api_url", "http://192.168.2.240/v1/chat-messages")
|
||||
self._response_mode = api_config.get("response_mode", "streaming")
|
||||
self._response_mode = api_config.get("response_mode", "blocking")
|
||||
self._connect_timeout_seconds = int(api_config.get("connect_timeout_seconds", 10))
|
||||
self._request_timeout_seconds = int(api_config.get("request_timeout_seconds", 180))
|
||||
self._retry_delays_seconds = api_config.get("retry_delays_seconds", [10, 20])
|
||||
self.message_storage = MessageStorage()
|
||||
db_manager = context.get("db_manager")
|
||||
if db_manager:
|
||||
@@ -275,6 +278,16 @@ class MessageSummaryPlugin(MessagePluginInterface):
|
||||
tokens_info = f"\n\n【tokens】输入: {prompt_tokens} 生成: {completion_tokens} 总: {total_tokens}"
|
||||
return answer + tokens_info
|
||||
|
||||
def _clean_summary_output(self, answer: str) -> str:
|
||||
"""清理总结输出中的思考内容和无关尾部内容"""
|
||||
if not answer:
|
||||
return answer
|
||||
|
||||
cleaned = remove_reasoning_content(answer)
|
||||
cleaned = remove_trailing_content(cleaned)
|
||||
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
|
||||
return cleaned
|
||||
|
||||
def _get_revoke_manager(self) -> Optional[MessageAutoRevoke]:
|
||||
"""优先使用消息上下文中的撤回器,定时任务场景则懒初始化一个"""
|
||||
if self.revoke:
|
||||
@@ -357,12 +370,15 @@ class MessageSummaryPlugin(MessagePluginInterface):
|
||||
"Accept": "text/event-stream" if self._response_mode == "streaming" else "application/json"
|
||||
}
|
||||
|
||||
max_retries = 3
|
||||
retry_delays = [2, 4]
|
||||
max_retries = len(self._retry_delays_seconds) + 1
|
||||
|
||||
for attempt in range(1, max_retries + 1):
|
||||
try:
|
||||
custom_timeout = ClientTimeout(total=None, connect=10, sock_read=300)
|
||||
custom_timeout = ClientTimeout(
|
||||
total=None,
|
||||
connect=self._connect_timeout_seconds,
|
||||
sock_read=self._request_timeout_seconds
|
||||
)
|
||||
conn = aiohttp.TCPConnector(keepalive_timeout=60) # 保持连接活跃
|
||||
async with aiohttp.ClientSession(connector=conn, timeout=custom_timeout) as session:
|
||||
async with session.post(self._api_url, headers=headers, json=data) as response:
|
||||
@@ -381,8 +397,7 @@ class MessageSummaryPlugin(MessagePluginInterface):
|
||||
|
||||
# 提取回答内容
|
||||
answer = response_data.get("answer", "")
|
||||
# 去除广告内容pollinations.ai 的广告
|
||||
# answer = remove_trailing_content(answer)
|
||||
answer = self._clean_summary_output(answer)
|
||||
spath = ""
|
||||
# 提取token使用情况
|
||||
metadata = response_data.get("metadata", {})
|
||||
@@ -420,7 +435,7 @@ class MessageSummaryPlugin(MessagePluginInterface):
|
||||
self.LOG.error(f"处理总结时出现未知错误: attempt={attempt}/{max_retries}, error={e}")
|
||||
|
||||
if attempt < max_retries:
|
||||
delay = retry_delays[attempt - 1] if attempt - 1 < len(retry_delays) else retry_delays[-1]
|
||||
delay = self._retry_delays_seconds[attempt - 1]
|
||||
self.LOG.warning(f"群总结生成失败,准备重试: attempt={attempt}/{max_retries}, delay={delay}s")
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
|
||||
@@ -22,3 +22,36 @@ def remove_grok_render_tags(text: str) -> str:
|
||||
cleaned = re.sub(r'<\s*grok:[^>]+?>[\s\S]*?<\s*/\s*grok:[^>]+?>', '', text, flags=re.IGNORECASE)
|
||||
cleaned = re.sub(r'<\s*grok:[^>]+?/?>', '', cleaned, flags=re.IGNORECASE)
|
||||
return cleaned
|
||||
|
||||
|
||||
def remove_reasoning_content(text: str) -> str:
|
||||
"""移除模型返回中的思考/推理内容,仅保留最终可展示结果。"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
cleaned = text
|
||||
|
||||
# 常见的思考标签
|
||||
cleaned = re.sub(r'<\s*think\s*>[\s\S]*?<\s*/\s*think\s*>', '', cleaned, flags=re.IGNORECASE)
|
||||
cleaned = re.sub(r'<\s*thinking\s*>[\s\S]*?<\s*/\s*thinking\s*>', '', cleaned, flags=re.IGNORECASE)
|
||||
cleaned = re.sub(r'<\s*reasoning\s*>[\s\S]*?<\s*/\s*reasoning\s*>', '', cleaned, flags=re.IGNORECASE)
|
||||
|
||||
# 常见 markdown 思考段落
|
||||
cleaned = re.sub(
|
||||
r'^\s*(#{1,6}\s*)?(思考过程|推理过程|分析过程|reasoning|thinking)\s*[::]?\s*$[\s\S]*?(?=^\s*(#{1,6}\s*)?\S|\Z)',
|
||||
'',
|
||||
cleaned,
|
||||
flags=re.IGNORECASE | re.MULTILINE
|
||||
)
|
||||
|
||||
# 某些模型会输出“思考内容:...最终答案:...”
|
||||
cleaned = re.sub(
|
||||
r'^\s*(思考内容|思维链|推理过程|分析过程)\s*[::][\s\S]*?(?=(最终答案|总结|摘要|#|\Z))',
|
||||
'',
|
||||
cleaned,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
cleaned = remove_grok_render_tags(cleaned)
|
||||
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
|
||||
return cleaned
|
||||
|
||||
Reference in New Issue
Block a user