import subprocess import time import markdown from pathlib import Path from playwright.async_api import async_playwright import os import asyncio import aiofiles from loguru import logger # linux 下需要安装字体 # sudo apt-get install -y fonts-noto-cjk fonts-noto-cjk-extra # sudo apt-get install -y fonts-noto-color-emoji fonts-noto-cjk fonts-wqy-microhei # 将 Markdown 字符串转换为 HTML async def md_str_to_html(md_content, output_html): """ 将 Markdown 字符串转换为 HTML 文件，并添加支持中文和 Emoji 的样式（异步版本）。 :param md_content: 输入的 Markdown 字符串 :param output_html: 输出的 HTML 文件路径 """ # 转换 Markdown 为 HTML，启用额外功能（如表格、代码高亮） html_content = markdown.markdown(md_content, extensions=['extra', 'codehilite']) # 添加基本的 HTML 结构和样式，支持中文和 Emoji css = """ """ # 构建完整的 HTML 内容 full_html = f''' {css} {html_content} ''' # 使用普通的文件写入，确保文件完全写入 try: with open(output_html, 'w', encoding='utf-8') as f: f.write(full_html) f.flush() # 强制刷新缓冲区 os.fsync(f.fileno()) # 确保写入磁盘 except Exception as e: logger.error(f"写入HTML文件失败: {e}") raise # 验证文件是否成功写入 try: with open(output_html, 'r', encoding='utf-8') as f: content = f.read() if not content: raise ValueError("HTML文件写入后为空") except Exception as e: logger.error(f"验证HTML文件失败: {e}") raise # 添加小延时确保文件系统同步 await asyncio.sleep(0.5) def check_chromium_installed(path): return os.path.isfile(path) and os.access(path, os.X_OK) async def html_to_image(html_file, output_image): """ 使用 Playwright 加载 HTML 文件并截图（异步）。 """ # 验证输入文件是否存在 if not os.path.exists(html_file): raise FileNotFoundError(f"HTML文件不存在: {html_file}") # 验证输入文件是否可读 if not os.access(html_file, os.R_OK): raise PermissionError(f"HTML文件不可读: {html_file}") try: async with async_playwright() as p: browser_path = None if os.name == 'nt': # Windows possible_chrome_paths = [ r"C:\Users\Liu_WIN10\AppData\Local\Google\Chrome\Application\chrome.exe", r"C:\Users\Liu-OPEN\AppData\Local\Google\Chrome\Application\chrome.exe", r"C:\Program Files\Google\Chrome\Application\chrome.exe", r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe" ] for path in possible_chrome_paths: if check_chromium_installed(path): browser_path = path logger.debug(f"找到浏览器路径: {browser_path}") break else: # Linux import glob user_home = os.path.expanduser("~") glob_pattern = os.path.join(user_home, ".cache", "ms-playwright", "chromium-*", "chrome-linux", "chrome") chrome_paths = glob.glob(glob_pattern) browser_path = None for path in sorted(chrome_paths, reverse=True): # 按版本名排序，最新优先 if check_chromium_installed(path): browser_path = path logger.debug(f"找到 Playwright Chromium 路径: {browser_path}") break if not browser_path: logger.debug("未找到已安装的 Chromium 浏览器，尝试使用 Playwright 默认安装") try: logger.debug("正在安装 Playwright 浏览器...") subprocess.run(["playwright", "install", "chromium"], check=True) logger.debug("Playwright 浏览器安装完成") except Exception as install_error: logger.debug(f"安装 Playwright 浏览器失败: {install_error}") browser = await p.chromium.launch() # 使用默认路径 else: browser = await p.chromium.launch(executable_path=browser_path) # 业务逻辑不变 page = None try: page = await browser.new_page() # 设置更长的超时时间，并添加更好的错误处理 file_url = f'file://{os.path.abspath(html_file)}' logger.debug(f"正在加载文件: {file_url}") # 使用更长的超时时间和更宽松的等待条件 await page.goto(file_url, timeout=120000, wait_until='domcontentloaded') # 等待页面完全加载 await page.wait_for_timeout(2000) # 设置视口大小 await page.set_viewport_size({"width": 750, "height": 800}) # 再次等待确保渲染完成 await page.wait_for_timeout(1000) # 截图 await page.screenshot(path=output_image, full_page=True) # 验证图片文件是否成功生成 if not os.path.exists(output_image): raise RuntimeError(f"截图失败，输出文件不存在: {output_image}") logger.debug(f"截图成功生成: {output_image}") except Exception as e: logger.error(f"截图过程中发生错误: {e}") # 如果截图失败，确保删除可能的不完整文件 if os.path.exists(output_image): try: os.remove(output_image) logger.debug(f"已删除不完整的截图文件: {output_image}") except Exception as cleanup_error: logger.warning(f"清理不完整文件失败: {cleanup_error}") raise finally: if page: await page.close() await browser.close() except Exception as e: logger.error(f"浏览器操作失败: {e}") if "Executable doesn't exist" in str(e): logger.error("请运行 'playwright install' 命令安装必要的浏览器组件") raise # 主函数：从字符串转换 Markdown 到图片（异步版） async def convert_md_str_to_image(md_content: str, output_image: str, max_retries: int = 3) -> str: """ 将 Markdown 字符串转换为图片（异步）。 Args: md_content (str): Markdown 内容字符串 output_image (str): 输出图片的文件名（不含路径） max_retries (int): 最大重试次数，默认3次 Returns: str: 生成的图片文件的绝对路径 Raises: FileNotFoundError: 如果临时目录无法创建或访问 ValueError: 如果 md_content 为空 RuntimeError: 如果重试次数耗尽后仍然失败 """ # 验证输入 if not md_content: raise ValueError("Markdown content cannot be empty") # 获取项目根目录 project_root = os.getcwd() project_root_path = Path(project_root).resolve() # 创建临时目录 - temp/md2image temp_dir = project_root_path / "temp" / "md2image" try: temp_dir.mkdir(parents=True, exist_ok=True) except Exception as e: logger.error(f"Failed to create temp directory: {e}") raise FileNotFoundError(f"Could not create temp directory: {temp_dir}") # 生成唯一的临时文件名 timestamp = int(time.time()) temp_html_filename = f"temp_output_{timestamp}.html" temp_html_path = temp_dir / temp_html_filename output_image_path = temp_dir / output_image # 确保输出图片路径的父目录存在 output_image_path.parent.mkdir(parents=True, exist_ok=True) last_error = None for attempt in range(max_retries): try: logger.debug(f"尝试第 {attempt + 1}/{max_retries} 次生成图片") # 清理之前的临时文件（如果存在） if temp_html_path.exists(): os.remove(str(temp_html_path)) if output_image_path.exists(): os.remove(str(output_image_path)) # 将 Markdown 转换为 HTML await md_str_to_html(md_content, str(temp_html_path)) # 添加更长的等待时间确保文件系统同步 await asyncio.sleep(1.0) # 检查文件是否存在和可读 if not os.path.exists(str(temp_html_path)): raise FileNotFoundError(f"HTML文件不存在: {temp_html_path}") # 验证HTML文件内容 with open(str(temp_html_path), 'r', encoding='utf-8') as f: html_content = f.read() if len(html_content) < 100: # HTML文件太短，可能有问题 raise ValueError(f"HTML文件内容异常，长度仅为: {len(html_content)}") logger.debug(f"HTML文件验证通过，大小: {len(html_content)} 字符") # 将 HTML 转换为图片 await html_to_image(str(temp_html_path), str(output_image_path)) # 验证生成的图片文件 if not os.path.exists(str(output_image_path)): raise RuntimeError(f"图片文件生成失败，文件不存在: {output_image_path}") # 检查图片文件大小 image_size = os.path.getsize(str(output_image_path)) if image_size < 1024: # 小于1KB的图片可能有问题 raise RuntimeError(f"生成的图片文件异常，大小仅为: {image_size} bytes") logger.debug(f"图片已成功生成：{output_image_path}，大小: {image_size} bytes") return str(output_image_path.resolve()) except Exception as e: last_error = e logger.warning(f"第 {attempt + 1} 次尝试失败: {e}") # 清理失败的文件 try: if temp_html_path.exists(): os.remove(str(temp_html_path)) if output_image_path.exists(): os.remove(str(output_image_path)) except Exception as cleanup_error: logger.warning(f"清理临时文件失败: {cleanup_error}") # 如果不是最后一次尝试，等待一段时间后重试 if attempt < max_retries - 1: wait_time = (attempt + 1) * 2 # 递增等待时间 logger.debug(f"等待 {wait_time} 秒后重试...") await asyncio.sleep(wait_time) # 所有重试都失败了 logger.error(f"经过 {max_retries} 次尝试后仍然失败") raise RuntimeError(f"图片生成失败，已重试 {max_retries} 次。最后错误: {last_error}") # finally: # # 可选：清理临时 HTML 文件 # if temp_html_path.exists(): # try: # # 使用异步方式删除文件 # # await asyncio.to_thread(os.remove, str(temp_html_path)) # # logger.debug(f"Deleted temporary HTML file: {temp_html_path}") # except Exception as e: # logger.warning(f"Failed to delete temporary HTML file: {e}") # 示例使用 if __name__ == "__main__": # 示例 Markdown 字符串（包含中文和 Emoji） md_content = """#🌟「4KED康复训练群 - 05-30 总结」🌟 ## 📊 今日数据快报 - **总消息数**：📩 约300条 - **最活跃时段**：🔥 09:00-10:00 （📈 50条/小时） - **聊天时段**：🕒 08:28 - 16:16 ## 🌌 话题总结 ### 1️⃣ 【车辆保险费用上涨】 ⭐⭐⭐⭐⭐ 🕒 **聊天时段**：11:33 - 13:16 （👥 6人参与） 🔍 **话题回顾**：本次讨论围绕 **车辆保险费用上涨** 展开，堪称今日群聊的"流量担当"。一开始，[@Summer✊] 抛出了一个爆炸性问题："今年车辆保费居然比去年贵"，瞬间点燃了大家的热情。随后，[@火鸡味锅巴] 表示支持，提出了 **保险改革导致价格上涨**，认为 **保险公司收益未达预期，保费自然水涨船高**，并举了一个让人信服的例子 **自己的保险从8K+只返了170元**。然而，[@达文西] 却持相反意见，抛出 **可以不买车损险**，强调 **认真开车就能省下大头费用**，还顺手甩出一句调侃"车损是大头"。讨论的高潮出现在 [@啊菜] 的加入，他不仅提出了 **进口车保险确实更贵**，还分享了一段 **奥迪比雷车贵是合理的对比**，让整个话题从抱怨上升到了品牌差异的讨论层面。大家你一言我一语，气氛热烈得像是开了一场线上辩论会！ 👍 **金句回顾**："保的少了，保价贵了，主打的就是个减量加价" —— [@火鸡味锅巴] 📌 **额外信息**：讨论中提及了 **保险改革和统一保费政策**，有兴趣的可以去深入研究一下。 ### 2️⃣ 【幼儿园六一活动攀比】 ⭐⭐⭐⭐ 🕒 **聊天时段**：15:17 - 15:25 （👥 5人参与） 🔍 **高能讨论**：本话题的火花由 [@暗香] 无意间点燃，他随口提到 **幼儿园六一活动零食大礼包攀比**，没想到立刻引发了一场头脑风暴。[@水牛] 率先下场，详细分析了 **老师组织活动的问题**，从 **统一准备没新意** 到 **自己准备变攀比**，娓娓道来，最后得出一个令人拍案叫绝的结论："这种事情就是老师不会搞"。紧接着，[@Summer✊] 不甘示弱，掏出了 **幽默建议** 作为佐证，比如 **带两瓶拉菲或者直接带钱把同学东西全买了**，让讨论瞬间变得硬核起来。然而，[@互联网赵括] 却用一贯的幽默风格插话："带15升哇哈哈"，搭配一个搞笑表情"猪头"，把严肃的气氛冲淡了不少，引得大家纷纷刷屏"哈哈哈"。 📌 **实用干货**：这次聊出了不少好东西，比如推荐了 **编五彩绳作为活动创意**，实测可用，建议收藏！ ### 3️⃣ 【手工制作高达模型的痛苦】 ⭐⭐⭐⭐ 🕒 **聊天时段**：09:10 - 09:29 （👥 5人参与） 🔍 **讨论亮点**：这次讨论围绕 **手工制作高达模型的痛苦经历** 展开，简直是群聊中的一场"思想盛宴"。一开始，大家还在轻松闲聊，但 [@火鸡味锅巴] 突然抛出了一个独特的视角："深刻体会了胶佬的痛苦，涂不完的热熔胶"，瞬间让话题升温。他还详细补充了 **制作过程中的各种困难**，比如 **热熔胶烫手、时间紧迫、还要上色**，逻辑清晰得让人不得不服。随后，[@清风] 表示认同，补充了 **可以优化制作，比如加LED灯光**，并提到自己如果参与必然"大杀四方"。而 [@Summer✊] 则提出了疑问："你真弄啊"，引发了一轮新的讨论。大家围绕 **制作难度** 和 **创意想法** 你来我往，聊得不亦乐乎。 👍 **精华总结**："太不容易了，时间又紧，明年请假得了" —— [@火鸡味锅巴] ### 4️⃣ 【谈恋爱风险与个性妹子】 ⭐⭐⭐ 🕒 **聊天时段**：13:39 - 14:00 （👥 5人参与） 🔍 **精彩瞬间**：这次讨论的焦点是 **谈恋爱的风险**，一开始只是 [@T T] 的随口一问："现在的男生要谈个恋爱风险蛮高"，没想到却掀起了一波热议。[@互联网赵括] 率先响应，提出了 **有个性的妹子通常不差**，并分享了一个真实案例："我印象里比较有个性的姑娘不会长得太差"，让大家对问题有了更直观的理解。随后，[@火鸡味锅巴] 提出了完全不同的 **观点**，理由是 **何必因为一棵树放弃一片森林**，还顺带调侃了一句："谈恋爱干嘛，互相满足生理需求不就好了"。讨论中，[@Y] 还搬出了搞笑补充 **榜一大哥的调侃**，试图证明 **恋爱风险确实高**，这让话题从日常闲聊上升到了"情感高度"。虽然最后大家没达成一致，但这场唇枪舌剑真是精彩纷呈！ ### 5️⃣ 【水费欠款离谱事件】 ⭐⭐⭐ 🕒 **聊天时段**：10:34 - 10:38 （👥 5人参与） 🔍 **讨论小结**：相比前面的话题，这次的 **水费欠款事件** 显得轻松不少，但依然趣味横生。话题从 [@雨的回忆] 的一句"买的房子原房东欠了2万多吨水费" 开始，聊着聊着就跑到了 **如何处理欠款的搞笑讨论**。比如，有人提到 **催前房东交钱**，[@互联网赵括] 立马接梗，分享了一段 **调侃原房东可能是干屠宰或发电的**，比如 **"拿来发电我都信"**，笑点密集，群里瞬间刷屏了一堆"哈哈"表情。 [@火鸡味锅巴] 还不忘补刀："欠了多少我也不知道"，让这场讨论成了名副其实的"欢乐场"。虽然话题不算深刻，但这种轻松的氛围也让大家放松了不少。 ## 🎖️ 今日荣誉榜 🏆 **群聊 MVP**：[@火鸡味锅巴] 👑 **获奖理由**： ✅ 发起 3 个热门话题，贡献 5 个表情包/段子 ✅ **创新贡献**："高达模型制作痛苦心得"（已申请专利 🎉） ✨ *本总结由 AI 自动生成，快来看看你今天是不是最靓的崽！🔥*""" spath = asyncio.run(convert_md_str_to_image(md_content, "output.png")) print(spath)