feat(douyu): add daily danmu report pipeline
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
import subprocess
|
||||
import time
|
||||
import markdown
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
@@ -10,8 +9,69 @@ import asyncio
|
||||
import re
|
||||
from loguru import logger
|
||||
|
||||
try:
|
||||
import markdown
|
||||
except ImportError:
|
||||
markdown = None
|
||||
|
||||
META_KEYWORDS = ["群", "群名", "时间", "日期", "成员", "消息", "统计", "总结", "来源", "生成", "记录"]
|
||||
|
||||
def _simple_markdown_to_html(md_content: str) -> str:
|
||||
lines = str(md_content or "").splitlines()
|
||||
html_parts = []
|
||||
in_ul = False
|
||||
paragraph_lines = []
|
||||
|
||||
def flush_paragraph():
|
||||
nonlocal paragraph_lines
|
||||
if paragraph_lines:
|
||||
text = " ".join(item.strip() for item in paragraph_lines if item.strip())
|
||||
if text:
|
||||
html_parts.append(f"<p>{text}</p>")
|
||||
paragraph_lines = []
|
||||
|
||||
def close_ul():
|
||||
nonlocal in_ul
|
||||
if in_ul:
|
||||
html_parts.append("</ul>")
|
||||
in_ul = False
|
||||
|
||||
for raw_line in lines:
|
||||
line = raw_line.rstrip()
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
flush_paragraph()
|
||||
close_ul()
|
||||
continue
|
||||
if stripped.startswith("# "):
|
||||
flush_paragraph()
|
||||
close_ul()
|
||||
html_parts.append(f"<h1>{stripped[2:].strip()}</h1>")
|
||||
continue
|
||||
if stripped.startswith("## "):
|
||||
flush_paragraph()
|
||||
close_ul()
|
||||
html_parts.append(f"<h2>{stripped[3:].strip()}</h2>")
|
||||
continue
|
||||
if stripped.startswith("### "):
|
||||
flush_paragraph()
|
||||
close_ul()
|
||||
html_parts.append(f"<h3>{stripped[4:].strip()}</h3>")
|
||||
continue
|
||||
if stripped.startswith("- "):
|
||||
flush_paragraph()
|
||||
if not in_ul:
|
||||
html_parts.append("<ul>")
|
||||
in_ul = True
|
||||
html_parts.append(f"<li>{stripped[2:].strip()}</li>")
|
||||
continue
|
||||
close_ul()
|
||||
paragraph_lines.append(stripped)
|
||||
|
||||
flush_paragraph()
|
||||
close_ul()
|
||||
return "\n".join(html_parts)
|
||||
|
||||
|
||||
async def safe_close_browser(browser, timeout: float = 4.0) -> None:
|
||||
if not browser:
|
||||
@@ -105,7 +165,10 @@ def _split_hero(html_body: str):
|
||||
|
||||
|
||||
async def md_str_to_html_content(md_content):
|
||||
html_body = markdown.markdown(md_content, extensions=['extra', 'codehilite'])
|
||||
if markdown is not None:
|
||||
html_body = markdown.markdown(md_content, extensions=['extra', 'codehilite'])
|
||||
else:
|
||||
html_body = _simple_markdown_to_html(md_content)
|
||||
hero_title, hero_meta, remain_html, hero_enabled = _split_hero(html_body)
|
||||
|
||||
css = """
|
||||
|
||||
Reference in New Issue
Block a user