diff --git a/utils/video_utils.py b/utils/video_utils.py
new file mode 100644
index 0000000..c593c93
--- /dev/null
+++ b/utils/video_utils.py
@@ -0,0 +1,48 @@
+import os
+
+import cv2
+
+from loguru import logger
+
+
+def _get_first_frame(video_path, output_path):
+    """
+    提取视频的第一帧并保存为图片
+    :param video_path: 视频文件路径
+    :param output_path: 输出图片路径
+    :return: 输出图片的绝对路径，如果失败则返回None
+    """
+    cap = None
+    try:
+        logger.info(f"开始提取视频首帧: {video_path}")
+        # 打开视频文件
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            logger.error(f"无法打开视频: {video_path}")
+            return None
+
+        # 读取首帧
+        ret, frame = cap.read()
+        if not ret:
+            logger.error("无法读取视频帧")
+            cap.release()
+            return None
+
+        # 保存首帧为图片
+        try:
+            cv2.imwrite(output_path, frame)
+            logger.info(f"首帧已保存为: {output_path}")
+        except Exception as e:
+            logger.error(f"保存首帧图片失败: {e}")
+            cap.release()
+            return None
+
+        # 释放资源
+        cap.release()
+        return os.path.abspath(output_path)
+
+    except Exception as e:
+        logger.error(f"提取视频首帧时出错: {e}")
+        return None
+    finally:
+        cap.release()
diff --git a/wechat_ipad/client/message.py b/wechat_ipad/client/message.py
index 5b4c013..7fcacf2 100644
--- a/wechat_ipad/client/message.py
+++ b/wechat_ipad/client/message.py
@@ -1,6 +1,7 @@
 import asyncio
 import base64
 import os
+import time
 from asyncio import Future
 from asyncio import Queue, sleep
 from io import BytesIO
@@ -14,6 +15,7 @@ from pymediainfo import MediaInfo
 import pysilk
 from pydub import AudioSegment
 
+from utils.video_utils import _get_first_frame
 from wechat_ipad import UserLoggedOut
 from wechat_ipad.client.base import WechatAPIClientBase
 
@@ -214,8 +216,7 @@ class MessageMixin(WechatAPIClientBase):
                     ValueError: 视频或图片参数都为空或都不为空时
                     根据error_handler处理错误
                 """
-        if not image:
-            image = Path(os.path.join(Path(__file__).resolve().parent, "fallback.png"))
+
         # get video base64 and duration
         if isinstance(video, str):
             vid_base64 = video
@@ -235,6 +236,9 @@ class MessageMixin(WechatAPIClientBase):
             file_len = len(video_bytes)
             vid_base64 = base64.b64encode(video_bytes).decode()
             media_info = MediaInfo.parse(video_path)
+            first_frame = _get_first_frame(video_path, f"dify_frame_{int(time.time())}.jpg")
+            if first_frame:
+                image = first_frame
         else:
             raise ValueError("video should be str, bytes, or path")
             # 获取视频时长
@@ -246,7 +250,8 @@ class MessageMixin(WechatAPIClientBase):
         if duration is None:
             duration = 1
             self.logging.error(f"无法从视频文件获取时长: {video}")
-
+        if not image:
+            image = Path(os.path.join(Path(__file__).resolve().parent, "fallback.png"))
         # get image base64
         if isinstance(image, str):
             image_base64 = image