From 211624eec9de088bb37e3e51b1fdf411a9d1fdcf Mon Sep 17 00:00:00 2001
From: liuwei <liuwei@wdtrgf.com.cn>
Date: Mon, 26 May 2025 14:17:36 +0800
Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4=E6=B5=8F=E8=A7=88=E5=99=A8?=
 =?UTF-8?q?=E8=B7=AF=E5=BE=84=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 utils/markdown_to_image.py | 71 +++++++++++++++++++++++---------------
 1 file changed, 44 insertions(+), 27 deletions(-)
diff --git a/utils/markdown_to_image.py b/utils/markdown_to_image.py
index 81c0fa1..4244835 100644
--- a/utils/markdown_to_image.py
+++ b/utils/markdown_to_image.py
@@ -1,12 +1,11 @@
+import subprocess
+
 import markdown
 from playwright.async_api import async_playwright
 import os
 import asyncio
 
-def playwright_browser_installed():
-    """检查 Playwright 的 Chromium 浏览器是否已安装"""
-    chromium_path = os.path.expanduser("~/.cache/ms-playwright/chromium")
-    return os.path.exists(chromium_path)
+
 # linux 下需要安装字体
 # sudo apt-get install -y fonts-noto-cjk fonts-noto-cjk-extra
 # sudo apt-get install -y fonts-noto-color-emoji fonts-noto-cjk fonts-wqy-microhei
@@ -161,47 +160,64 @@ def md_str_to_html(md_content, output_html):
         f.write('</body></html>')
 
 
-# 使用 Playwright 将 HTML 渲染并截图（异步版）
+def check_chromium_installed(path):
+    return os.path.isfile(path) and os.access(path, os.X_OK)
+
+
 async def html_to_image(html_file, output_image):
     """
     使用 Playwright 加载 HTML 文件并截图（异步）。
     """
     try:
         async with async_playwright() as p:
-            # Windows 系统
-            if os.name == 'nt':
+            browser_path = None
+
+            if os.name == 'nt':  # Windows
                 possible_chrome_paths = [
                     r"C:\Users\Liu_WIN10\AppData\Local\Google\Chrome\Application\chrome.exe",
                     r"C:\Users\Liu-OPEN\AppData\Local\Google\Chrome\Application\chrome.exe",
                     r"C:\Program Files\Google\Chrome\Application\chrome.exe",
                     r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
                 ]
-                browser_path = next((p for p in possible_chrome_paths if os.path.exists(p)), None)
-                if browser_path:
-                    print(f"找到浏览器路径: {browser_path}")
-                    browser = await p.chromium.launch(executable_path=browser_path)
-                else:
-                    print("未找到 Chrome，尝试使用 Playwright 自带浏览器")
-                    if not playwright_browser_installed():
-                        print("Playwright 浏览器未安装，正在安装...")
-                        import subprocess
-                        subprocess.run(["playwright", "install", "chromium"], check=True)
-                        print("安装完成")
-                    browser = await p.chromium.launch()
-            else:
-                if not playwright_browser_installed():
-                    print("Playwright 浏览器未安装，正在安装...")
-                    import subprocess
-                    subprocess.run(["playwright", "install", "chromium"], check=True)
-                    print("安装完成")
-                browser = await p.chromium.launch()
+                for path in possible_chrome_paths:
+                    if check_chromium_installed(path):
+                        browser_path = path
+                        print(f"找到浏览器路径: {browser_path}")
+                        break
+            else:  # Linux
+                import glob
+                user_home = os.path.expanduser("~")
+                glob_pattern = os.path.join(user_home, ".cache", "ms-playwright", "chromium-*", "chrome-linux",
+                                            "chrome")
+                chrome_paths = glob.glob(glob_pattern)
+                browser_path = None
+                for path in sorted(chrome_paths, reverse=True):  # 按版本名排序，最新优先
+                    if check_chromium_installed(path):
+                        browser_path = path
+                        print(f"找到 Playwright Chromium 路径: {browser_path}")
+                        break
 
+            if not browser_path:
+                print("未找到已安装的 Chromium 浏览器，尝试使用 Playwright 默认安装")
+                try:
+                    print("正在安装 Playwright 浏览器...")
+                    subprocess.run(["playwright", "install", "chromium"], check=True)
+                    print("Playwright 浏览器安装完成")
+                except Exception as install_error:
+                    print(f"安装 Playwright 浏览器失败: {install_error}")
+
+                browser = await p.chromium.launch()  # 使用默认路径
+            else:
+                browser = await p.chromium.launch(executable_path=browser_path)
+
+            # 业务逻辑不变
             page = await browser.new_page()
             await page.goto(f'file://{os.path.abspath(html_file)}')
             await page.set_viewport_size({"width": 750, "height": 800})
             await page.wait_for_timeout(500)
             await page.screenshot(path=output_image, full_page=True)
             await browser.close()
+
     except Exception as e:
         print(f"浏览器操作失败: {e}")
         if "Executable doesn't exist" in str(e):
@@ -221,9 +237,10 @@ async def convert_md_str_to_image(md_content, output_image):
     print(f"图片已生成：{output_image}")
     return os.path.abspath(output_image)
 
+
 # 示例使用
 if __name__ == "__main__":
     # 示例 Markdown 字符串（包含中文和 Emoji）
     md_content = """### Python的类型\n1. **数字类型**：\n    - **整数（int）**：用于表示整数，例如`5`、`-10`。在Python 3中，整数的长度不受限制，可以表示任意大小的整数。\n    - **浮点数（float）**：用于表示带有小数部分的数字，例如`3.14`、`-0.5`。它采用IEEE 754标准来存储，可能会存在精度问题。\n    - **复数（complex）**：由实数部分和虚数部分组成，例如`3 + 4j`，其中`j`表示虚数单位。\n2. **序列类型**：\n    - **字符串（str）**：用于表示文本，是不可变的字符序列，例如`'Hello, World!'`、`\"Python\"`。可以通过索引和切片操作访问其中的字符。\n    - **列表（list）**：是可变的有序序列，可以包含不同类型的元素，例如`[1, 'apple', 3.14]`。支持添加、删除、修改元素等操作。\n    - **元组（tuple）**：与列表类似，但它是不可变的有序序列，例如`(1, 'apple', 3.14)`。一旦创建，其元素不能被修改。\n3. **映射类型**：\n    - **字典（dict）**：是一种无序的键值对集合，用于存储和检索数据，例如`{'name': 'Alice', 'age': 25}`。键必须是唯一且不可变的，值可以是任意类型。\n4. **集合类型**：\n    - **集合（set）**：是一个无序的不重复元素集合，例如`{1, 2, 3}`。常用于去重和数学集合运算，如并集、交集、差集等。\n    - **冻结集合（frozenset）**：与集合类似，但它是不可变的，一旦创建就不能修改。\n\n### 将时间戳转换为str\n在Python中，可以使用`datetime`模块来完成时间戳转换为字符串的操作。以下是示例代码：\n\n```python\nimport datetime\n\n\n# 假设时间戳为秒级时间戳\ntimestamp = 1672531200\n# 将时间戳转换为datetime对象\ndt = datetime.datetime.fromtimestamp(timestamp)\n# 将datetime对象格式化为字符串\nstr_time = dt.strftime('%Y-%m-%d %H:%M:%S')\nprint(str_time)\n```\n\n在上述代码中：\n1. `import datetime`导入`datetime`模块。\n2. `datetime.datetime.fromtimestamp(timestamp)`将时间戳（这里假设为秒级时间戳）转换为`datetime`对象。\n3. `dt.strftime('%Y-%m-%d %H:%M:%S')`使用`strftime`方法将`datetime`对象格式化为指定格式的字符串，`%Y`表示四位数的年份，`%m`表示两位数的月份，`%d`表示两位数的日期，`%H`表示24小时制的小时，`%M`表示分钟，`%S`表示秒。\n\n如果时间戳是毫秒级的，需要先将其转换为秒级，例如：\n\n```python\nimport datetime\n\n\n# 假设时间戳为毫秒级时间戳\ntimestamp = 1672531200000\n# 将毫秒级时间戳转换为秒级\ntimestamp_seconds = timestamp / 1000\n# 将时间戳转换为datetime对象\ndt = datetime.datetime.fromtimestamp(timestamp_seconds)\n# 将datetime对象格式化为字符串\nstr_time = dt.strftime('%Y-%m-%d %H:%M:%S')\nprint(str_time)\n"""
     spath = asyncio.run(convert_md_str_to_image(md_content, "output.png"))
-    print(spath)
\ No newline at end of file
+    print(spath)