From 96432039ccc73e9f66f950fb2afe7bdd39eec7c1 Mon Sep 17 00:00:00 2001
From: liuwei <liuwei@wdtrgf.com.cn>
Date: Tue, 7 Apr 2026 16:17:58 +0800
Subject: [PATCH] add retry for xiaoniu api requests

---
 plugins/ai_auto_response/config.toml   |  2 ++
 plugins/ai_auto_response/llm_client.py | 50 ++++++++++++++++----------
 2 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/plugins/ai_auto_response/config.toml b/plugins/ai_auto_response/config.toml
index e311567..a535572 100644
--- a/plugins/ai_auto_response/config.toml
+++ b/plugins/ai_auto_response/config.toml
@@ -18,6 +18,8 @@ timeout_seconds = 45
 temperature = 0.35
 max_tokens = 120
 stream = true
+max_retries = 3
+retry_delay_seconds = 1.0
 
 [mode]
 group_default_mode = "social"
diff --git a/plugins/ai_auto_response/llm_client.py b/plugins/ai_auto_response/llm_client.py
index ffd193d..6ab0725 100644
--- a/plugins/ai_auto_response/llm_client.py
+++ b/plugins/ai_auto_response/llm_client.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import json
+import time
 from typing import Dict, List, Optional
 
 import requests
@@ -18,6 +19,8 @@ class LLMClient:
         self.temperature = float(self.config.get("temperature", 0.7))
         self.max_tokens = int(self.config.get("max_tokens", 500))
         self.stream = bool(self.config.get("stream", True))
+        self.max_retries = max(int(self.config.get("max_retries", 3) or 3), 1)
+        self.retry_delay_seconds = float(self.config.get("retry_delay_seconds", 1.0) or 1.0)
         self.last_error = ""
 
     def chat(
@@ -61,25 +64,34 @@ class LLMClient:
         if self.api_key:
             headers["Authorization"] = f"Bearer {self.api_key}"
 
-        try:
-            if self.stream:
-                return self._chat_streaming(payload, headers)
-            response = requests.post(
-                f"{self.base_url}/{self.endpoint}",
-                json=payload,
-                headers=headers,
-                timeout=self.timeout_seconds,
-            )
-            response.raise_for_status()
-            data = response.json()
-            text = self._extract_text(data)
-            if text:
-                return text
-            self.last_error = f"empty_model_output:{self.model}"
-            return ""
-        except Exception as exc:
-            self.last_error = f"request_failed:{exc}"
-            return ""
+        for attempt in range(1, self.max_retries + 1):
+            try:
+                if self.stream:
+                    text = self._chat_streaming(payload, headers)
+                else:
+                    text = self._chat_non_streaming(payload, headers)
+                if text:
+                    return text
+            except Exception as exc:
+                self.last_error = f"request_failed:attempt_{attempt}:{exc}"
+            if attempt < self.max_retries:
+                time.sleep(self.retry_delay_seconds * attempt)
+        return ""
+
+    def _chat_non_streaming(self, payload: Dict, headers: Dict[str, str]) -> str:
+        response = requests.post(
+            f"{self.base_url}/{self.endpoint}",
+            json=payload,
+            headers=headers,
+            timeout=self.timeout_seconds,
+        )
+        response.raise_for_status()
+        data = response.json()
+        text = self._extract_text(data)
+        if text:
+            return text
+        self.last_error = f"empty_model_output:{self.model}"
+        return ""
 
     def _chat_streaming(self, payload: Dict, headers: Dict[str, str]) -> str:
         chunks: List[str] = []