Initial commit: add all skills files
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
1579
minimax-pdf/scripts/cover.py
Normal file
1579
minimax-pdf/scripts/cover.py
Normal file
File diff suppressed because it is too large
Load Diff
200
minimax-pdf/scripts/fill_inspect.py
Normal file
200
minimax-pdf/scripts/fill_inspect.py
Normal file
@@ -0,0 +1,200 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
fill_inspect.py — Inspect form fields in an existing PDF.
|
||||
|
||||
Usage:
|
||||
python3 fill_inspect.py --input form.pdf
|
||||
python3 fill_inspect.py --input form.pdf --out fields.json
|
||||
|
||||
Outputs a JSON summary of every fillable field: name, type, current value,
|
||||
allowed values (for checkboxes / dropdowns), and page number.
|
||||
|
||||
Exit codes: 0 success, 1 bad args / file not found, 2 dep missing, 3 read error
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import importlib.util
|
||||
import os
|
||||
|
||||
|
||||
|
||||
|
||||
def ensure_deps():
|
||||
if importlib.util.find_spec("pypdf") is None:
|
||||
import subprocess
|
||||
subprocess.check_call(
|
||||
[sys.executable, "-m", "pip", "install", "--break-system-packages", "-q", "pypdf"]
|
||||
)
|
||||
|
||||
|
||||
ensure_deps()
|
||||
from pypdf import PdfReader
|
||||
from pypdf.generic import ArrayObject, DictionaryObject, NameObject, TextStringObject
|
||||
|
||||
|
||||
# ── Field type resolution ──────────────────────────────────────────────────────
|
||||
def _field_type(field) -> str:
|
||||
ft = field.get("/FT")
|
||||
if ft is None:
|
||||
return "unknown"
|
||||
ft = str(ft)
|
||||
if ft == "/Tx":
|
||||
return "text"
|
||||
if ft == "/Btn":
|
||||
ff = int(field.get("/Ff", 0))
|
||||
return "radio" if ff & (1 << 15) else "checkbox"
|
||||
if ft == "/Ch":
|
||||
ff = int(field.get("/Ff", 0))
|
||||
return "dropdown" if ff & (1 << 17) else "listbox"
|
||||
if ft == "/Sig":
|
||||
return "signature"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _field_value(field) -> str | None:
|
||||
v = field.get("/V")
|
||||
return str(v) if v is not None else None
|
||||
|
||||
|
||||
def _field_options(field, ftype: str) -> dict:
|
||||
extra = {}
|
||||
if ftype in ("checkbox",):
|
||||
ap = field.get("/AP")
|
||||
if ap and "/N" in ap:
|
||||
states = [str(k) for k in ap["/N"]]
|
||||
extra["states"] = states
|
||||
checked = next((s for s in states if s != "/Off"), None)
|
||||
if checked:
|
||||
extra["checked_value"] = checked
|
||||
if ftype in ("dropdown", "listbox"):
|
||||
opt = field.get("/Opt")
|
||||
if opt:
|
||||
choices = []
|
||||
for item in opt:
|
||||
if isinstance(item, (list, ArrayObject)) and len(item) >= 2:
|
||||
choices.append({"value": str(item[0]), "label": str(item[1])})
|
||||
else:
|
||||
choices.append({"value": str(item), "label": str(item)})
|
||||
extra["choices"] = choices
|
||||
if ftype == "radio":
|
||||
kids = field.get("/Kids")
|
||||
if kids:
|
||||
values = []
|
||||
for kid in kids:
|
||||
ap = kid.get("/AP")
|
||||
if ap and "/N" in ap:
|
||||
for k in ap["/N"]:
|
||||
if str(k) != "/Off":
|
||||
values.append(str(k))
|
||||
extra["radio_values"] = values
|
||||
return extra
|
||||
|
||||
|
||||
def _walk_fields(fields, page_map: dict, parent_name: str = "") -> list:
|
||||
"""Recursively collect all leaf fields."""
|
||||
result = []
|
||||
for field in fields:
|
||||
name = str(field.get("/T", ""))
|
||||
full = f"{parent_name}.{name}" if parent_name else name
|
||||
|
||||
kids = field.get("/Kids")
|
||||
# Kids that have /T are sub-fields (groups), not widget annotations
|
||||
if kids:
|
||||
named_kids = [k for k in kids if "/T" in k]
|
||||
if named_kids:
|
||||
result.extend(_walk_fields(named_kids, page_map, full))
|
||||
continue
|
||||
|
||||
ftype = _field_type(field)
|
||||
if ftype == "unknown":
|
||||
continue
|
||||
|
||||
entry = {
|
||||
"name": full,
|
||||
"type": ftype,
|
||||
"value": _field_value(field),
|
||||
}
|
||||
entry.update(_field_options(field, ftype))
|
||||
|
||||
# Page lookup via /P indirect reference
|
||||
p_ref = field.get("/P")
|
||||
if p_ref and hasattr(p_ref, "idnum"):
|
||||
entry["page"] = page_map.get(p_ref.idnum, "?")
|
||||
|
||||
result.append(entry)
|
||||
return result
|
||||
|
||||
|
||||
def inspect(pdf_path: str) -> dict:
|
||||
try:
|
||||
reader = PdfReader(pdf_path)
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": str(e)}
|
||||
|
||||
# Build page-number lookup: {object_id: 1-based page number}
|
||||
page_map = {}
|
||||
for i, page in enumerate(reader.pages):
|
||||
if hasattr(page, "indirect_reference") and page.indirect_reference:
|
||||
page_map[page.indirect_reference.idnum] = i + 1
|
||||
|
||||
acroform = reader.trailer.get("/Root", {}).get("/AcroForm")
|
||||
if acroform is None or "/Fields" not in acroform:
|
||||
return {
|
||||
"status": "ok",
|
||||
"has_fields": False,
|
||||
"field_count": 0,
|
||||
"fields": [],
|
||||
"note": "This PDF has no fillable form fields.",
|
||||
}
|
||||
|
||||
fields = _walk_fields(list(acroform["/Fields"]), page_map)
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"has_fields": bool(fields),
|
||||
"field_count": len(fields),
|
||||
"fields": fields,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Inspect PDF form fields")
|
||||
parser.add_argument("--input", required=True, help="PDF file to inspect")
|
||||
parser.add_argument("--out", default="", help="Write JSON to file (optional)")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.input):
|
||||
print(json.dumps({"status": "error", "error": f"File not found: {args.input}"}),
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
result = inspect(args.input)
|
||||
|
||||
output = json.dumps(result, indent=2, ensure_ascii=False)
|
||||
|
||||
if args.out:
|
||||
with open(args.out, "w") as f:
|
||||
f.write(output)
|
||||
|
||||
print(output)
|
||||
|
||||
# Human-readable summary
|
||||
if result["status"] == "ok" and result["has_fields"]:
|
||||
print(f"\n── Fields in {args.input} ──────────────────────────────",
|
||||
file=sys.stderr)
|
||||
for f in result["fields"]:
|
||||
pg = f" p.{f['page']}" if "page" in f else ""
|
||||
val = f" = {f['value']}" if f.get("value") else ""
|
||||
extra = ""
|
||||
if "choices" in f:
|
||||
extra = f" [{', '.join(c['value'] for c in f['choices'][:4])}{'…' if len(f['choices'])>4 else ''}]"
|
||||
elif "states" in f:
|
||||
extra = f" {f['states']}"
|
||||
print(f" {f['type']:12} {f['name']}{pg}{val}{extra}", file=sys.stderr)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
242
minimax-pdf/scripts/fill_write.py
Normal file
242
minimax-pdf/scripts/fill_write.py
Normal file
@@ -0,0 +1,242 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
fill_write.py — Write values into PDF form fields.
|
||||
|
||||
Usage:
|
||||
# From a JSON data file
|
||||
python3 fill_write.py --input form.pdf --data values.json --out filled.pdf
|
||||
|
||||
# Inline JSON
|
||||
python3 fill_write.py --input form.pdf --out filled.pdf \
|
||||
--values '{"FirstName": "Jane", "Agree": "true"}'
|
||||
|
||||
values format:
|
||||
{
|
||||
"FieldName": "text value", # text field
|
||||
"CheckBox1": "true", # checkbox (true / false)
|
||||
"Dropdown1": "OptionValue", # dropdown (must match an existing choice value)
|
||||
"Radio1": "/Choice2" # radio (must match a radio value)
|
||||
}
|
||||
|
||||
Exit codes: 0 success, 1 bad args, 2 dep missing, 3 read/write error, 4 validation error
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import importlib.util
|
||||
|
||||
|
||||
|
||||
|
||||
def ensure_deps():
|
||||
if importlib.util.find_spec("pypdf") is None:
|
||||
import subprocess
|
||||
subprocess.check_call(
|
||||
[sys.executable, "-m", "pip", "install", "--break-system-packages", "-q", "pypdf"]
|
||||
)
|
||||
|
||||
|
||||
ensure_deps()
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
from pypdf.generic import NameObject, TextStringObject, BooleanObject
|
||||
|
||||
|
||||
# ── Field helpers ─────────────────────────────────────────────────────────────
|
||||
def _field_type(field) -> str:
|
||||
ft = str(field.get("/FT", ""))
|
||||
if ft == "/Tx": return "text"
|
||||
if ft == "/Btn":
|
||||
ff = int(field.get("/Ff", 0))
|
||||
return "radio" if ff & (1 << 15) else "checkbox"
|
||||
if ft == "/Ch":
|
||||
ff = int(field.get("/Ff", 0))
|
||||
return "dropdown" if ff & (1 << 17) else "listbox"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _get_checkbox_on_value(field) -> str:
|
||||
"""Return the /AP /N key that means 'checked' (anything except /Off)."""
|
||||
ap = field.get("/AP")
|
||||
if ap and "/N" in ap:
|
||||
for k in ap["/N"]:
|
||||
if str(k) != "/Off":
|
||||
return str(k)
|
||||
return "/Yes"
|
||||
|
||||
|
||||
def _get_dropdown_values(field) -> list[str]:
|
||||
opt = field.get("/Opt")
|
||||
if not opt:
|
||||
return []
|
||||
values = []
|
||||
for item in opt:
|
||||
try:
|
||||
from pypdf.generic import ArrayObject
|
||||
if isinstance(item, (list, ArrayObject)) and len(item) >= 1:
|
||||
values.append(str(item[0]))
|
||||
else:
|
||||
values.append(str(item))
|
||||
except Exception:
|
||||
values.append(str(item))
|
||||
return values
|
||||
|
||||
|
||||
# ── Walk + fill ───────────────────────────────────────────────────────────────
|
||||
def _walk_and_fill(fields, data: dict, filled: list, errors: list, parent: str = ""):
|
||||
for field in fields:
|
||||
name = str(field.get("/T", ""))
|
||||
full = f"{parent}.{name}" if parent else name
|
||||
|
||||
# Recurse into named groups
|
||||
kids = field.get("/Kids")
|
||||
if kids:
|
||||
named = [k for k in kids if "/T" in k]
|
||||
if named:
|
||||
_walk_and_fill(named, data, filled, errors, full)
|
||||
continue
|
||||
|
||||
if full not in data:
|
||||
continue
|
||||
|
||||
value = data[full]
|
||||
ftype = _field_type(field)
|
||||
|
||||
if ftype == "text":
|
||||
field.update({
|
||||
NameObject("/V"): TextStringObject(str(value)),
|
||||
NameObject("/DV"): TextStringObject(str(value)),
|
||||
})
|
||||
filled.append(full)
|
||||
|
||||
elif ftype == "checkbox":
|
||||
truthy = str(value).lower() in ("true", "1", "yes", "on")
|
||||
on_val = _get_checkbox_on_value(field)
|
||||
pdf_val = on_val if truthy else "/Off"
|
||||
field.update({
|
||||
NameObject("/V"): NameObject(pdf_val),
|
||||
NameObject("/AS"): NameObject(pdf_val),
|
||||
})
|
||||
filled.append(full)
|
||||
|
||||
elif ftype in ("dropdown", "listbox"):
|
||||
allowed = _get_dropdown_values(field)
|
||||
if allowed and str(value) not in allowed:
|
||||
errors.append({
|
||||
"field": full,
|
||||
"error": f"Value '{value}' not in allowed choices: {allowed}"
|
||||
})
|
||||
continue
|
||||
field.update({NameObject("/V"): TextStringObject(str(value))})
|
||||
filled.append(full)
|
||||
|
||||
elif ftype == "radio":
|
||||
# Radio value must start with /
|
||||
pdf_val = str(value) if str(value).startswith("/") else f"/{value}"
|
||||
field.update({
|
||||
NameObject("/V"): NameObject(pdf_val),
|
||||
NameObject("/AS"): NameObject(pdf_val),
|
||||
})
|
||||
filled.append(full)
|
||||
|
||||
else:
|
||||
errors.append({"field": full, "error": f"Unsupported field type: {ftype}"})
|
||||
|
||||
|
||||
def fill(pdf_path: str, out_path: str, data: dict) -> dict:
|
||||
try:
|
||||
reader = PdfReader(pdf_path)
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": str(e)}
|
||||
|
||||
writer = PdfWriter()
|
||||
writer.clone_document_from_reader(reader)
|
||||
|
||||
acroform = writer._root_object.get("/AcroForm") # type: ignore[attr-defined]
|
||||
if acroform is None or "/Fields" not in acroform:
|
||||
return {
|
||||
"status": "error",
|
||||
"error": "This PDF has no fillable form fields.",
|
||||
"hint": "Run fill_inspect.py first to confirm the PDF has fields.",
|
||||
}
|
||||
|
||||
# Enable appearance regeneration so viewers show the new values
|
||||
acroform.update({NameObject("/NeedAppearances"): BooleanObject(True)})
|
||||
|
||||
filled: list[str] = []
|
||||
errors: list[dict] = []
|
||||
_walk_and_fill(list(acroform["/Fields"]), data, filled, errors)
|
||||
|
||||
# Warn about requested fields that were never found
|
||||
not_found = [k for k in data if k not in filled and not any(e["field"] == k for e in errors)]
|
||||
|
||||
try:
|
||||
os.makedirs(os.path.dirname(os.path.abspath(out_path)), exist_ok=True)
|
||||
with open(out_path, "wb") as f:
|
||||
writer.write(f)
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": f"Write failed: {e}"}
|
||||
|
||||
result = {
|
||||
"status": "ok",
|
||||
"out": out_path,
|
||||
"filled_count": len(filled),
|
||||
"filled_fields": filled,
|
||||
"size_kb": os.path.getsize(out_path) // 1024,
|
||||
}
|
||||
if errors:
|
||||
result["validation_errors"] = errors
|
||||
if not_found:
|
||||
result["not_found"] = not_found
|
||||
result["hint"] = "Run fill_inspect.py to see all available field names."
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Fill PDF form fields")
|
||||
parser.add_argument("--input", required=True, help="Input PDF with form fields")
|
||||
parser.add_argument("--out", required=True, help="Output PDF path")
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument("--data", help="Path to JSON file with field values")
|
||||
group.add_argument("--values", help="Inline JSON string with field values")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.input):
|
||||
print(json.dumps({"status": "error", "error": f"File not found: {args.input}"}),
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Load data
|
||||
try:
|
||||
if args.data:
|
||||
with open(args.data) as f:
|
||||
data = json.load(f)
|
||||
else:
|
||||
data = json.loads(args.values)
|
||||
except Exception as e:
|
||||
print(json.dumps({"status": "error", "error": f"JSON parse error: {e}"}),
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
result = fill(args.input, args.out, data)
|
||||
print(json.dumps(result, indent=2, ensure_ascii=False))
|
||||
|
||||
if result["status"] == "ok":
|
||||
print(f"\n── Fill complete ───────────────────────────────────────",
|
||||
file=sys.stderr)
|
||||
print(f" Output : {result['out']}", file=sys.stderr)
|
||||
print(f" Filled : {result['filled_count']} field(s)", file=sys.stderr)
|
||||
if result.get("validation_errors"):
|
||||
print(f" Errors :", file=sys.stderr)
|
||||
for e in result["validation_errors"]:
|
||||
print(f" • {e['field']}: {e['error']}", file=sys.stderr)
|
||||
if result.get("not_found"):
|
||||
print(f" Not found: {result['not_found']}", file=sys.stderr)
|
||||
print("", file=sys.stderr)
|
||||
else:
|
||||
sys.exit(3)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
491
minimax-pdf/scripts/make.sh
Normal file
491
minimax-pdf/scripts/make.sh
Normal file
@@ -0,0 +1,491 @@
|
||||
#!/usr/bin/env bash
|
||||
# make.sh — minimax-pdf unified CLI
|
||||
# Usage: bash make.sh <command> [options]
|
||||
#
|
||||
# Commands:
|
||||
# check Verify all dependencies
|
||||
# fix Auto-install missing dependencies
|
||||
# run --title T --type TYPE Full pipeline → output.pdf
|
||||
# --out FILE Output path (default: output.pdf)
|
||||
# --author A --date D
|
||||
# --subtitle S
|
||||
# --abstract A Optional abstract text for cover
|
||||
# --cover-image URL Optional cover image URL/path
|
||||
# --content FILE Path to content.json (optional)
|
||||
# demo Build a full-featured demo to demo.pdf
|
||||
#
|
||||
# Document types:
|
||||
# report proposal resume portfolio academic general
|
||||
# minimal stripe diagonal frame editorial
|
||||
# magazine darkroom terminal poster
|
||||
#
|
||||
# Content block types:
|
||||
# h1 h2 h3 body bullet numbered callout table
|
||||
# image figure code math chart flowchart bibliography
|
||||
# divider caption pagebreak spacer
|
||||
#
|
||||
# Exit codes: 0 success, 1 usage error, 2 dep missing, 3 runtime error
|
||||
|
||||
set -euo pipefail
|
||||
SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PY="python3"
|
||||
NODE="node"
|
||||
|
||||
# ── Colour helpers ─────────────────────────────────────────────────────────────
|
||||
red() { printf '\033[0;31m%s\033[0m\n' "$*"; }
|
||||
green() { printf '\033[0;32m%s\033[0m\n' "$*"; }
|
||||
yellow() { printf '\033[0;33m%s\033[0m\n' "$*"; }
|
||||
bold() { printf '\033[1m%s\033[0m\n' "$*"; }
|
||||
|
||||
# ── check ──────────────────────────────────────────────────────────────────────
|
||||
cmd_check() {
|
||||
local ok=true
|
||||
bold "Checking dependencies..."
|
||||
|
||||
# Python
|
||||
if command -v python3 &>/dev/null; then
|
||||
green " ✓ python3 $(python3 --version 2>&1 | awk '{print $2}')"
|
||||
else
|
||||
red " ✗ python3 not found"
|
||||
ok=false
|
||||
fi
|
||||
|
||||
# reportlab
|
||||
if python3 -c "import reportlab" 2>/dev/null; then
|
||||
green " ✓ reportlab"
|
||||
else
|
||||
yellow " ⚠ reportlab not installed (run: make.sh fix)"
|
||||
ok=false
|
||||
fi
|
||||
|
||||
# pypdf
|
||||
if python3 -c "import pypdf" 2>/dev/null; then
|
||||
green " ✓ pypdf"
|
||||
else
|
||||
yellow " ⚠ pypdf not installed (run: make.sh fix)"
|
||||
ok=false
|
||||
fi
|
||||
|
||||
# Node.js
|
||||
if command -v node &>/dev/null; then
|
||||
green " ✓ node $(node --version)"
|
||||
else
|
||||
red " ✗ node not found — cover rendering unavailable"
|
||||
ok=false
|
||||
fi
|
||||
|
||||
# Playwright
|
||||
if node -e "require('playwright')" 2>/dev/null || \
|
||||
node -e "require(require('child_process').execSync('npm root -g').toString().trim()+'/playwright')" 2>/dev/null; then
|
||||
green " ✓ playwright"
|
||||
else
|
||||
yellow " ⚠ playwright not found (run: make.sh fix)"
|
||||
ok=false
|
||||
fi
|
||||
|
||||
# matplotlib (optional — required for math/chart/flowchart; degrades gracefully)
|
||||
if python3 -c "import matplotlib" 2>/dev/null; then
|
||||
green " ✓ matplotlib (math, chart, flowchart blocks enabled)"
|
||||
else
|
||||
yellow " ⚠ matplotlib not installed — math/chart/flowchart blocks degrade to text (run: make.sh fix)"
|
||||
fi
|
||||
|
||||
if $ok; then
|
||||
green "\nAll dependencies satisfied."
|
||||
exit 0
|
||||
else
|
||||
yellow "\nSome dependencies missing. Run: bash make.sh fix"
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
|
||||
# ── fix ────────────────────────────────────────────────────────────────────────
|
||||
cmd_fix() {
|
||||
bold "Installing missing dependencies..."
|
||||
local rc=0
|
||||
|
||||
# Python packages
|
||||
if command -v python3 &>/dev/null; then
|
||||
python3 -m pip install --break-system-packages -q reportlab pypdf matplotlib 2>/dev/null \
|
||||
|| python3 -m pip install -q reportlab pypdf matplotlib 2>/dev/null \
|
||||
|| { yellow " pip install failed — try: pip install reportlab pypdf matplotlib"; rc=3; }
|
||||
green " ✓ Python packages installed (reportlab, pypdf, matplotlib)"
|
||||
fi
|
||||
|
||||
# Playwright
|
||||
if command -v npm &>/dev/null; then
|
||||
npm install -g playwright --silent 2>/dev/null && \
|
||||
npx playwright install chromium --silent 2>/dev/null && \
|
||||
green " ✓ Playwright + Chromium installed" || \
|
||||
{ yellow " playwright install failed — try manually"; rc=3; }
|
||||
else
|
||||
yellow " npm not found — cannot install Playwright automatically"
|
||||
rc=2
|
||||
fi
|
||||
|
||||
if [[ $rc -eq 0 ]]; then
|
||||
green "\nAll dependencies installed. Run: bash make.sh check"
|
||||
fi
|
||||
exit $rc
|
||||
}
|
||||
|
||||
# ── run ────────────────────────────────────────────────────────────────────────
|
||||
cmd_run() {
|
||||
local title="Untitled Document"
|
||||
local type="general"
|
||||
local author=""
|
||||
local date=""
|
||||
local subtitle=""
|
||||
local abstract=""
|
||||
local cover_image=""
|
||||
local accent=""
|
||||
local cover_bg=""
|
||||
local content_file=""
|
||||
local out="output.pdf"
|
||||
local workdir
|
||||
workdir="$(mktemp -d)"
|
||||
|
||||
# Parse options
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--title) title="$2"; shift 2 ;;
|
||||
--type) type="$2"; shift 2 ;;
|
||||
--author) author="$2"; shift 2 ;;
|
||||
--date) date="$2"; shift 2 ;;
|
||||
--subtitle) subtitle="$2"; shift 2 ;;
|
||||
--abstract) abstract="$2"; shift 2 ;;
|
||||
--cover-image) cover_image="$2"; shift 2 ;;
|
||||
--accent) accent="$2"; shift 2 ;;
|
||||
--cover-bg) cover_bg="$2"; shift 2 ;;
|
||||
--content) content_file="$2"; shift 2 ;;
|
||||
--out) out="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
bold "Building: $title"
|
||||
echo " Type : $type"
|
||||
echo " Output : $out"
|
||||
|
||||
# Step 1: tokens
|
||||
echo ""
|
||||
bold "Step 1/4 Generating design tokens..."
|
||||
local accent_args=()
|
||||
[[ -n "$accent" ]] && accent_args+=(--accent "$accent")
|
||||
[[ -n "$cover_bg" ]] && accent_args+=(--cover-bg "$cover_bg")
|
||||
$PY "$SCRIPTS/palette.py" \
|
||||
--title "$title" --type "$type" \
|
||||
--author "$author" --date "$date" \
|
||||
--out "$workdir/tokens.json" \
|
||||
"${accent_args[@]+"${accent_args[@]}"}"
|
||||
|
||||
# Inject optional cover fields into tokens.json
|
||||
if [[ -n "$abstract" || -n "$cover_image" ]]; then
|
||||
PDF_ABSTRACT="$abstract" PDF_COVER_IMAGE="$cover_image" PDF_TOKENS="$workdir/tokens.json" \
|
||||
$PY - <<'PYEOF'
|
||||
import json, os
|
||||
with open(os.environ["PDF_TOKENS"]) as f:
|
||||
t = json.load(f)
|
||||
abstract = os.environ.get("PDF_ABSTRACT", "")
|
||||
cover_image = os.environ.get("PDF_COVER_IMAGE", "")
|
||||
if abstract:
|
||||
t["abstract"] = abstract
|
||||
if cover_image:
|
||||
t["cover_image"] = cover_image
|
||||
with open(os.environ["PDF_TOKENS"], "w") as f:
|
||||
json.dump(t, f, indent=2)
|
||||
PYEOF
|
||||
fi
|
||||
|
||||
cat "$workdir/tokens.json" | $PY -c "
|
||||
import json,sys
|
||||
t=json.load(sys.stdin)
|
||||
print(f' Mood : {t[\"mood\"]}')
|
||||
print(f' Pattern : {t[\"cover_pattern\"]}')
|
||||
print(f' Fonts : {t[\"font_display\"]} / {t[\"font_body\"]}')"
|
||||
|
||||
# Step 2: cover HTML + render
|
||||
echo ""
|
||||
bold "Step 2/4 Rendering cover..."
|
||||
local subtitle_args=()
|
||||
[[ -n "$subtitle" ]] && subtitle_args=(--subtitle "$subtitle")
|
||||
$PY "$SCRIPTS/cover.py" \
|
||||
--tokens "$workdir/tokens.json" \
|
||||
--out "$workdir/cover.html" \
|
||||
"${subtitle_args[@]+"${subtitle_args[@]}"}"
|
||||
|
||||
$NODE "$SCRIPTS/render_cover.js" \
|
||||
--input "$workdir/cover.html" \
|
||||
--out "$workdir/cover.pdf"
|
||||
green " ✓ Cover rendered"
|
||||
|
||||
# Step 3: body
|
||||
echo ""
|
||||
bold "Step 3/4 Rendering body pages..."
|
||||
if [[ -z "$content_file" ]]; then
|
||||
# Generate a minimal placeholder body
|
||||
cat > "$workdir/content.json" <<'JSON'
|
||||
[
|
||||
{"type":"h1", "text":"Document Body"},
|
||||
{"type":"body", "text":"Replace this with your content.json file using --content path/to/content.json"},
|
||||
{"type":"body", "text":"See the content.json schema in the skill README for the full list of supported block types: h1, h2, h3, body, bullet, callout, table, pagebreak, spacer."}
|
||||
]
|
||||
JSON
|
||||
content_file="$workdir/content.json"
|
||||
yellow " No content file provided — using placeholder body."
|
||||
fi
|
||||
|
||||
$PY "$SCRIPTS/render_body.py" \
|
||||
--tokens "$workdir/tokens.json" \
|
||||
--content "$content_file" \
|
||||
--out "$workdir/body.pdf"
|
||||
green " ✓ Body rendered"
|
||||
|
||||
# Step 4: merge
|
||||
echo ""
|
||||
bold "Step 4/4 Merging and QA..."
|
||||
$PY "$SCRIPTS/merge.py" \
|
||||
--cover "$workdir/cover.pdf" \
|
||||
--body "$workdir/body.pdf" \
|
||||
--out "$out" \
|
||||
--title "$title"
|
||||
|
||||
# Cleanup
|
||||
rm -rf "$workdir"
|
||||
}
|
||||
|
||||
# ── fill ──────────────────────────────────────────────────────────────────────
|
||||
cmd_fill() {
|
||||
local input="" out="" values="" data_file="" inspect_only=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--input) input="$2"; shift 2 ;;
|
||||
--out) out="$2"; shift 2 ;;
|
||||
--values) values="$2"; shift 2 ;;
|
||||
--data) data_file="$2"; shift 2 ;;
|
||||
--inspect) inspect_only=true; shift ;;
|
||||
*) echo "Unknown option: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$input" ]]; then
|
||||
echo "Usage: make.sh fill --input form.pdf [--out filled.pdf] [--values '{...}'] [--data values.json] [--inspect]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if $inspect_only || [[ -z "$out" && -z "$values" && -z "$data_file" ]]; then
|
||||
bold "Inspecting form fields in: $input"
|
||||
$PY "$SCRIPTS/fill_inspect.py" --input "$input"
|
||||
return
|
||||
fi
|
||||
|
||||
bold "Filling form: $input → $out"
|
||||
|
||||
local val_args=""
|
||||
if [[ -n "$values" ]]; then val_args="--values $values"; fi
|
||||
if [[ -n "$data_file" ]]; then val_args="--data $data_file"; fi
|
||||
|
||||
$PY "$SCRIPTS/fill_write.py" --input "$input" --out "$out" $val_args
|
||||
}
|
||||
|
||||
# ── reformat ───────────────────────────────────────────────────────────────────
|
||||
cmd_reformat() {
|
||||
local input="" title="Reformatted Document" type="general"
|
||||
local author="" date="" out="output.pdf" subtitle=""
|
||||
local tmpdir
|
||||
tmpdir="$(mktemp -d)"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--input) input="$2"; shift 2 ;;
|
||||
--title) title="$2"; shift 2 ;;
|
||||
--type) type="$2"; shift 2 ;;
|
||||
--author) author="$2"; shift 2 ;;
|
||||
--date) date="$2"; shift 2 ;;
|
||||
--subtitle) subtitle="$2"; shift 2 ;;
|
||||
--out) out="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$input" ]]; then
|
||||
echo "Usage: make.sh reformat --input source.md --title T --type TYPE --out output.pdf"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
bold "Parsing: $input"
|
||||
$PY "$SCRIPTS/reformat_parse.py" --input "$input" --out "$tmpdir/content.json"
|
||||
green " ✓ Parsed to content.json"
|
||||
|
||||
bold "Applying design and building PDF..."
|
||||
local sub_args=()
|
||||
[[ -n "$subtitle" ]] && sub_args=(--subtitle "$subtitle")
|
||||
|
||||
cmd_run \
|
||||
--title "$title" --type "$type" \
|
||||
--author "$author" --date "$date" \
|
||||
--content "$tmpdir/content.json" \
|
||||
--out "$out" \
|
||||
"${sub_args[@]+"${sub_args[@]}"}"
|
||||
|
||||
rm -rf "$tmpdir"
|
||||
}
|
||||
|
||||
# ── demo ──────────────────────────────────────────────────────────────────────
|
||||
cmd_demo() {
|
||||
local tmpdir
|
||||
tmpdir="$(mktemp -d)"
|
||||
|
||||
cat > "$tmpdir/content.json" <<'JSON'
|
||||
[
|
||||
{"type":"h1", "text":"Executive Summary"},
|
||||
{"type":"body", "text":"This document was generated by minimax-pdf — a skill for creating visually polished PDFs. Every design decision is rooted in the document type and content, not a generic template."},
|
||||
{"type":"callout", "text":"Key insight: design tokens flow from palette.py through every renderer, keeping cover and body visually consistent."},
|
||||
|
||||
{"type":"h1", "text":"How It Works"},
|
||||
{"type":"h2", "text":"The Token Pipeline"},
|
||||
{"type":"body", "text":"The palette.py script infers a color palette and typography pair from the document type. These tokens are written to tokens.json and consumed by every downstream script."},
|
||||
{"type":"numbered","text":"palette.py generates color tokens, font selection, and the cover pattern"},
|
||||
{"type":"numbered","text":"cover.py renders the cover HTML using the selected pattern"},
|
||||
{"type":"numbered","text":"render_cover.js uses Playwright to convert the HTML cover to PDF"},
|
||||
{"type":"numbered","text":"render_body.py builds inner pages from content.json using ReportLab"},
|
||||
{"type":"numbered","text":"merge.py combines cover + body and runs final QA checks"},
|
||||
|
||||
{"type":"h2", "text":"Cover Patterns"},
|
||||
{"type":"table",
|
||||
"headers": ["Pattern", "Document type", "Visual character"],
|
||||
"rows": [
|
||||
["fullbleed", "report, general", "Deep background · dot-grid texture"],
|
||||
["split", "proposal", "Left dark panel · right dot-grid"],
|
||||
["typographic", "resume, academic", "Oversized display type · first-word accent"],
|
||||
["atmospheric", "portfolio", "Dark bg · radial glow · dot-grid"],
|
||||
["magazine", "magazine", "Cream bg · centered · hero image"],
|
||||
["darkroom", "darkroom", "Navy bg · centered · grayscale image"],
|
||||
["terminal", "terminal", "Near-black · grid lines · monospace"],
|
||||
["poster", "poster", "White · thick sidebar · oversized title"]
|
||||
]
|
||||
},
|
||||
|
||||
{"type":"h1", "text":"Data Visualisation"},
|
||||
{"type":"h2", "text":"Performance Metrics (Chart)"},
|
||||
{"type":"body", "text":"Charts are rendered natively using matplotlib with a color palette derived from the document accent. No external chart services or image files required."},
|
||||
{"type":"chart",
|
||||
"chart_type": "bar",
|
||||
"title": "Quarterly Performance",
|
||||
"labels": ["Q1", "Q2", "Q3", "Q4"],
|
||||
"datasets": [
|
||||
{"label": "Revenue", "values": [120, 145, 132, 178]},
|
||||
{"label": "Expenses", "values": [95, 108, 99, 122]}
|
||||
],
|
||||
"y_label": "USD (thousands)",
|
||||
"caption": "Quarterly revenue vs. expenses"
|
||||
},
|
||||
|
||||
{"type":"h2", "text":"Market Share (Pie Chart)"},
|
||||
{"type":"chart",
|
||||
"chart_type": "pie",
|
||||
"labels": ["Product A", "Product B", "Product C", "Other"],
|
||||
"datasets": [{"values": [42, 28, 18, 12]}],
|
||||
"caption": "Annual market share by product line"
|
||||
},
|
||||
|
||||
{"type":"pagebreak"},
|
||||
|
||||
{"type":"h1", "text":"Mathematics"},
|
||||
{"type":"body", "text":"Display math is rendered via matplotlib mathtext — no LaTeX binary installation required. Inline references use standard [N] notation in body text."},
|
||||
{"type":"math", "text":"E = mc^2", "label":"(1)"},
|
||||
{"type":"math", "text":"\\int_0^\\infty e^{-x^2}\\,dx = \\frac{\\sqrt{\\pi}}{2}", "label":"(2)"},
|
||||
{"type":"math", "text":"\\sum_{n=1}^{\\infty} \\frac{1}{n^2} = \\frac{\\pi^2}{6}", "caption":"Basel problem (Euler, 1734)"},
|
||||
|
||||
{"type":"h1", "text":"Process Flow"},
|
||||
{"type":"body", "text":"Flowcharts are drawn directly using matplotlib patches — no Graphviz or external tools needed. Supported node shapes: rect, diamond, oval, parallelogram."},
|
||||
{"type":"flowchart",
|
||||
"nodes": [
|
||||
{"id":"start", "label":"Start", "shape":"oval"},
|
||||
{"id":"input", "label":"Receive Input", "shape":"parallelogram"},
|
||||
{"id":"valid", "label":"Valid?", "shape":"diamond"},
|
||||
{"id":"proc", "label":"Process Data", "shape":"rect"},
|
||||
{"id":"err", "label":"Return Error", "shape":"rect"},
|
||||
{"id":"out", "label":"Return Result", "shape":"parallelogram"},
|
||||
{"id":"end", "label":"End", "shape":"oval"}
|
||||
],
|
||||
"edges": [
|
||||
{"from":"start", "to":"input"},
|
||||
{"from":"input", "to":"valid"},
|
||||
{"from":"valid", "to":"proc", "label":"Yes"},
|
||||
{"from":"valid", "to":"err", "label":"No"},
|
||||
{"from":"proc", "to":"out"},
|
||||
{"from":"err", "to":"end"},
|
||||
{"from":"out", "to":"end"}
|
||||
],
|
||||
"caption": "Data validation and processing flow"
|
||||
},
|
||||
|
||||
{"type":"h1", "text":"Code Example"},
|
||||
{"type":"code", "language":"python",
|
||||
"text":"# Design token pipeline\ntokens = palette.build_tokens(\n title=\"Annual Report\",\n doc_type=\"report\",\n author=\"J. Smith\",\n date=\"March 2026\",\n)\nhtml = cover.render(tokens)\npdf = render_cover(html)"},
|
||||
|
||||
{"type":"h1", "text":"Design Principles"},
|
||||
{"type":"body", "text":"The aesthetic system is documented in design/design.md. The core rule: every design decision must be rooted in the document content and purpose. A color chosen because it fits the content will always outperform a color chosen because it seems safe."},
|
||||
{"type":"h2", "text":"Restraint over decoration"},
|
||||
{"type":"body", "text":"The page is done when there is nothing left to remove. Accent color appears on section rules only — not on headings, not on bullets. No card components, no drop shadows."},
|
||||
{"type":"callout", "text":"A PDF passes the quality bar when a designer would not be embarrassed to hand it to a client."},
|
||||
|
||||
{"type":"pagebreak"},
|
||||
{"type":"bibliography",
|
||||
"title": "References",
|
||||
"items": [
|
||||
{"id":"1","text":"Bringhurst, R. (2004). The Elements of Typographic Style (3rd ed.). Hartley & Marks."},
|
||||
{"id":"2","text":"Cairo, A. (2016). The Truthful Art: Data, Charts, and Maps for Communication. New Riders."},
|
||||
{"id":"3","text":"Hochuli, J. & Kinross, R. (1996). Designing Books: Practice and Theory. Hyphen Press."}
|
||||
]
|
||||
}
|
||||
]
|
||||
JSON
|
||||
|
||||
cmd_run \
|
||||
--title "minimax-pdf demo" \
|
||||
--type "report" \
|
||||
--author "minimax-pdf skill" \
|
||||
--date "$(date '+%B %Y')" \
|
||||
--subtitle "A demonstration of the token-based design pipeline" \
|
||||
--content "$tmpdir/content.json" \
|
||||
--out "demo.pdf"
|
||||
|
||||
rm -rf "$tmpdir"
|
||||
}
|
||||
|
||||
# ── dispatch ───────────────────────────────────────────────────────────────────
|
||||
main() {
|
||||
if [[ $# -lt 1 ]]; then
|
||||
bold "minimax-pdf — make.sh"
|
||||
echo ""
|
||||
echo "Usage: bash make.sh <command> [options]"
|
||||
echo ""
|
||||
echo "Commands:"
|
||||
echo " check Verify all dependencies"
|
||||
echo " fix Auto-install missing deps"
|
||||
echo " run --title T --type TYPE CREATE: full pipeline → PDF"
|
||||
echo " [--author A] [--date D] [--subtitle S]"
|
||||
echo " [--abstract A] [--cover-image URL]"
|
||||
echo " [--accent #HEX] [--cover-bg #HEX]"
|
||||
echo " [--content content.json] [--out output.pdf]"
|
||||
echo " fill --input f.pdf FILL: inspect or fill form fields"
|
||||
echo " reformat --input doc.md REFORMAT: parse doc → apply design → PDF"
|
||||
echo " demo Build a full-featured demo PDF"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
case "$1" in
|
||||
check) cmd_check ;;
|
||||
fix) cmd_fix ;;
|
||||
run) shift; cmd_run "$@" ;;
|
||||
fill) shift; cmd_fill "$@" ;;
|
||||
reformat) shift; cmd_reformat "$@" ;;
|
||||
demo) cmd_demo ;;
|
||||
*) echo "Unknown command: $1"; exit 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
112
minimax-pdf/scripts/merge.py
Normal file
112
minimax-pdf/scripts/merge.py
Normal file
@@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
merge.py — Merge cover.pdf + body.pdf → final.pdf and print a QA report.
|
||||
|
||||
Usage:
|
||||
python3 merge.py --cover cover.pdf --body body.pdf --out final.pdf
|
||||
python3 merge.py --cover cover.pdf --body body.pdf --out final.pdf --title "My Report"
|
||||
|
||||
Exit codes: 0 success, 1 bad args/missing file, 2 missing dep, 3 merge error
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import importlib.util
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
def ensure_deps():
|
||||
if importlib.util.find_spec("pypdf") is None:
|
||||
import subprocess
|
||||
subprocess.check_call(
|
||||
[sys.executable, "-m", "pip", "install", "--break-system-packages", "-q", "pypdf"]
|
||||
)
|
||||
|
||||
|
||||
ensure_deps()
|
||||
|
||||
from pypdf import PdfWriter, PdfReader
|
||||
|
||||
|
||||
def merge(cover_path: str, body_path: str, out_path: str, title: str = "") -> dict:
|
||||
writer = PdfWriter()
|
||||
|
||||
for fpath, label in [(cover_path, "cover"), (body_path, "body")]:
|
||||
if not os.path.exists(fpath):
|
||||
return {"status": "error", "error": f"{label} file not found: {fpath}"}
|
||||
reader = PdfReader(fpath)
|
||||
for page in reader.pages:
|
||||
writer.add_page(page)
|
||||
|
||||
# Set PDF metadata
|
||||
if title:
|
||||
writer.add_metadata({"/Title": title})
|
||||
|
||||
os.makedirs(os.path.dirname(os.path.abspath(out_path)), exist_ok=True)
|
||||
with open(out_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
size_kb = os.path.getsize(out_path) // 1024
|
||||
total_pages = len(writer.pages)
|
||||
|
||||
# ── QA checks ─────────────────────────────────────────────────────────────
|
||||
warnings = []
|
||||
|
||||
# Page count sanity
|
||||
cover_pages = len(PdfReader(cover_path).pages)
|
||||
body_pages = len(PdfReader(body_path).pages)
|
||||
if cover_pages != 1:
|
||||
warnings.append(f"Cover PDF has {cover_pages} pages (expected 1)")
|
||||
|
||||
# File size sanity
|
||||
if size_kb < 20:
|
||||
warnings.append(f"Output is very small ({size_kb} KB) — may have blank pages")
|
||||
if size_kb > 50_000:
|
||||
warnings.append(f"Output is very large ({size_kb} KB) — consider compressing images")
|
||||
|
||||
report = {
|
||||
"status": "ok",
|
||||
"out": out_path,
|
||||
"total_pages": total_pages,
|
||||
"cover_pages": cover_pages,
|
||||
"body_pages": body_pages,
|
||||
"size_kb": size_kb,
|
||||
}
|
||||
if warnings:
|
||||
report["warnings"] = warnings
|
||||
|
||||
return report
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Merge cover + body PDFs")
|
||||
parser.add_argument("--cover", required=True)
|
||||
parser.add_argument("--body", required=True)
|
||||
parser.add_argument("--out", required=True)
|
||||
parser.add_argument("--title", default="")
|
||||
args = parser.parse_args()
|
||||
|
||||
result = merge(args.cover, args.body, args.out, args.title)
|
||||
|
||||
if result["status"] == "error":
|
||||
print(json.dumps(result), file=sys.stderr)
|
||||
sys.exit(3)
|
||||
|
||||
print(json.dumps(result))
|
||||
|
||||
# Human-readable QA summary
|
||||
print(f"\n── Build complete ──────────────────────────────────────")
|
||||
print(f" Output : {result['out']}")
|
||||
print(f" Pages : {result['total_pages']} total (1 cover + {result['body_pages']} body)")
|
||||
print(f" Size : {result['size_kb']} KB")
|
||||
if result.get("warnings"):
|
||||
print(f" ⚠ Warnings:")
|
||||
for w in result["warnings"]:
|
||||
print(f" • {w}")
|
||||
else:
|
||||
print(f" ✓ No issues detected")
|
||||
print(f"────────────────────────────────────────────────────────\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
521
minimax-pdf/scripts/palette.py
Normal file
521
minimax-pdf/scripts/palette.py
Normal file
@@ -0,0 +1,521 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
palette.py — Infer design tokens from document metadata.
|
||||
|
||||
Usage:
|
||||
python3 palette.py --title "AI Trends 2025" --type report --out tokens.json
|
||||
python3 palette.py --title "John Doe Resume" --type resume --out tokens.json
|
||||
python3 palette.py --meta meta.json --out tokens.json
|
||||
|
||||
Outputs tokens.json consumed by all downstream scripts.
|
||||
Cover fonts are loaded via Google Fonts @import in the cover HTML (no local caching).
|
||||
Body fonts always use ReportLab system fonts (Times-Bold / Helvetica).
|
||||
Exit codes: 0 success, 1 bad args, 3 write error
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
|
||||
# ── Palette library ────────────────────────────────────────────────────────────
|
||||
# Each entry: cover colors + cover_pattern + mood
|
||||
PALETTES = {
|
||||
"report": {
|
||||
# Charcoal blue-grey cover; muted steel blue accent — authoritative, not flashy
|
||||
"cover_bg": "#1B2A38",
|
||||
"accent": "#3B6D8A",
|
||||
"accent_lt": "#E6EFF5",
|
||||
"text_light": "#EDE9E2",
|
||||
"page_bg": "#FAFAF8",
|
||||
"dark": "#1A1E24",
|
||||
"body_text": "#2C2C30",
|
||||
"muted": "#7A7A84",
|
||||
"cover_pattern": "fullbleed",
|
||||
"mood": "authoritative",
|
||||
},
|
||||
"proposal": {
|
||||
# Dark charcoal cover; slate grey-blue accent — confident, understated
|
||||
"cover_bg": "#22272E",
|
||||
"accent": "#4E6070",
|
||||
"accent_lt": "#EAECEE",
|
||||
"text_light": "#EDE9E2",
|
||||
"page_bg": "#FAFAF7",
|
||||
"dark": "#18191E",
|
||||
"body_text": "#28282E",
|
||||
"muted": "#7A7870",
|
||||
"cover_pattern": "split",
|
||||
"mood": "confident",
|
||||
},
|
||||
"resume": {
|
||||
# White; deep navy accent — clean and unambiguous
|
||||
"cover_bg": "#FFFFFF",
|
||||
"accent": "#1C3557",
|
||||
"accent_lt": "#E8EEF5",
|
||||
"text_light": "#FFFFFF",
|
||||
"page_bg": "#FFFFFF",
|
||||
"dark": "#111111",
|
||||
"body_text": "#222222",
|
||||
"muted": "#888888",
|
||||
"cover_pattern": "typographic",
|
||||
"mood": "clean",
|
||||
},
|
||||
"portfolio": {
|
||||
# Near-black charcoal; cool slate grey accent — subdued professional
|
||||
"cover_bg": "#191C20",
|
||||
"accent": "#6A7A88",
|
||||
"accent_lt": "#EAECEE",
|
||||
"text_light": "#EDE9E4",
|
||||
"page_bg": "#F8F8F8",
|
||||
"dark": "#18191E",
|
||||
"body_text": "#28282E",
|
||||
"muted": "#8A8A96",
|
||||
"cover_pattern": "atmospheric",
|
||||
"mood": "expressive",
|
||||
},
|
||||
"academic": {
|
||||
# Warm white; classic navy accent — scholarly standard
|
||||
"cover_bg": "#F5F4F0",
|
||||
"accent": "#2A436A",
|
||||
"accent_lt": "#E6EBF4",
|
||||
"text_light": "#FFFFFF",
|
||||
"page_bg": "#F5F4F0",
|
||||
"dark": "#1A1A28",
|
||||
"body_text": "#1E1E2A",
|
||||
"muted": "#686877",
|
||||
"cover_pattern": "typographic",
|
||||
"mood": "scholarly",
|
||||
},
|
||||
"general": {
|
||||
# Dark slate; muted steel accent — neutral, no-nonsense
|
||||
"cover_bg": "#1F2329",
|
||||
"accent": "#4A6070",
|
||||
"accent_lt": "#E6EAEC",
|
||||
"text_light": "#EEEBE5",
|
||||
"page_bg": "#F8F6F2",
|
||||
"dark": "#1A1A1A",
|
||||
"body_text": "#2C2C2C",
|
||||
"muted": "#888888",
|
||||
"cover_pattern": "fullbleed",
|
||||
"mood": "neutral",
|
||||
},
|
||||
# ── Extended types — each uses a distinct new cover pattern ─────────────────
|
||||
"minimal": {
|
||||
# Warm off-white; dark neutral grey — truly restrained, no color signal
|
||||
"cover_bg": "#F7F6F4",
|
||||
"accent": "#4A4A4A",
|
||||
"accent_lt": "#EBEBEA",
|
||||
"text_light": "#F7F6F4",
|
||||
"page_bg": "#F7F6F4",
|
||||
"dark": "#111111",
|
||||
"body_text": "#222222",
|
||||
"muted": "#999999",
|
||||
"cover_pattern": "minimal",
|
||||
"mood": "restrained",
|
||||
},
|
||||
"stripe": {
|
||||
# Near-black; charcoal slate accent — structured, no-nonsense
|
||||
"cover_bg": "#1E222A",
|
||||
"accent": "#4A5568",
|
||||
"accent_lt": "#EAECEE",
|
||||
"text_light": "#FFFFFF",
|
||||
"page_bg": "#F8F8F7",
|
||||
"dark": "#0E1117",
|
||||
"body_text": "#262630",
|
||||
"muted": "#888898",
|
||||
"cover_pattern": "stripe",
|
||||
"mood": "bold",
|
||||
},
|
||||
"diagonal": {
|
||||
# Deep navy; muted slate-blue accent — dignified, controlled
|
||||
"cover_bg": "#1A2535",
|
||||
"accent": "#3D5A72",
|
||||
"accent_lt": "#E4EBF0",
|
||||
"text_light": "#EEF0F5",
|
||||
"page_bg": "#F8FAFC",
|
||||
"dark": "#0F1A2A",
|
||||
"body_text": "#1E2C3A",
|
||||
"muted": "#7A8A96",
|
||||
"cover_pattern": "diagonal",
|
||||
"mood": "dynamic",
|
||||
},
|
||||
"frame": {
|
||||
# Warm parchment; dark muted brown — classical, formal
|
||||
"cover_bg": "#F5F2EC",
|
||||
"accent": "#5C4A38",
|
||||
"accent_lt": "#EAE5DE",
|
||||
"text_light": "#F5F2EC",
|
||||
"page_bg": "#F5F2EC",
|
||||
"dark": "#2A1E14",
|
||||
"body_text": "#2C2018",
|
||||
"muted": "#9A8A78",
|
||||
"cover_pattern": "frame",
|
||||
"mood": "classical",
|
||||
},
|
||||
"editorial": {
|
||||
# White; deep burgundy accent — editorial weight without the shout
|
||||
"cover_bg": "#FFFFFF",
|
||||
"accent": "#7A2B36",
|
||||
"accent_lt": "#EEE4E5",
|
||||
"text_light": "#FFFFFF",
|
||||
"page_bg": "#FFFFFF",
|
||||
"dark": "#0A0A0A",
|
||||
"body_text": "#1A1A1A",
|
||||
"muted": "#777777",
|
||||
"cover_pattern": "editorial",
|
||||
"mood": "editorial",
|
||||
},
|
||||
# ── New patterns (v2) ────────────────────────────────────────────────────────
|
||||
"magazine": {
|
||||
# Warm linen; deep navy accent — formal publication standard
|
||||
"cover_bg": "#F0EEE9",
|
||||
"accent": "#1C3557",
|
||||
"accent_lt": "#E4EBF3",
|
||||
"text_light": "#FFFFFF",
|
||||
"page_bg": "#F0EEE9",
|
||||
"dark": "#0D1A2B",
|
||||
"body_text": "#2A2A2A",
|
||||
"muted": "#888888",
|
||||
"cover_pattern": "magazine",
|
||||
"mood": "magazine",
|
||||
},
|
||||
"darkroom": {
|
||||
# Deep navy; muted steel-blue accent — premium, controlled
|
||||
"cover_bg": "#151C27",
|
||||
"accent": "#3D5A7A",
|
||||
"accent_lt": "#E2EBF2",
|
||||
"text_light": "#EDE9E2",
|
||||
"page_bg": "#F7F7F5",
|
||||
"dark": "#0A1018",
|
||||
"body_text": "#2C2C2C",
|
||||
"muted": "#8A9AB0",
|
||||
"cover_pattern": "darkroom",
|
||||
"mood": "darkroom",
|
||||
},
|
||||
"terminal": {
|
||||
# Near-black; forest green accent — technical, serious (not neon)
|
||||
"cover_bg": "#0D1117",
|
||||
"accent": "#3D7A5C",
|
||||
"accent_lt": "#E2EEE8",
|
||||
"text_light": "#E6EDF3",
|
||||
"page_bg": "#F8F8F6",
|
||||
"dark": "#010409",
|
||||
"body_text": "#2C2C2C",
|
||||
"muted": "#5A7A6A",
|
||||
"cover_pattern": "terminal",
|
||||
"mood": "terminal",
|
||||
},
|
||||
"poster": {
|
||||
# White; near-black accent sidebar — stark, unambiguous
|
||||
"cover_bg": "#FFFFFF",
|
||||
"accent": "#0A0A0A",
|
||||
"accent_lt": "#EBEBEA",
|
||||
"text_light": "#FFFFFF",
|
||||
"page_bg": "#FFFFFF",
|
||||
"dark": "#0A0A0A",
|
||||
"body_text": "#1A1A1A",
|
||||
"muted": "#888888",
|
||||
"cover_pattern": "poster",
|
||||
"mood": "poster",
|
||||
},
|
||||
}
|
||||
|
||||
# ── Font pairs — CSS names for cover HTML, ReportLab names for body ─────────────
|
||||
# cover uses Google Fonts via @import (no local disk caching needed)
|
||||
# body always uses system fonts via ReportLab
|
||||
FONT_PAIRS = {
|
||||
"authoritative": {
|
||||
"display_css": "Playfair Display",
|
||||
"body_css": "IBM Plex Sans",
|
||||
"gfonts_import": "https://fonts.googleapis.com/css2?family=Playfair+Display:wght@700;900&family=IBM+Plex+Sans:ital,wght@0,400;0,600;1,400&display=swap",
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
},
|
||||
"confident": {
|
||||
"display_css": "Syne",
|
||||
"body_css": "Nunito Sans",
|
||||
"gfonts_import": "https://fonts.googleapis.com/css2?family=Syne:wght@600;800&family=Nunito+Sans:wght@400;600;700&display=swap",
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
},
|
||||
"clean": {
|
||||
"display_css": "DM Serif Display",
|
||||
"body_css": "DM Sans",
|
||||
"gfonts_import": "https://fonts.googleapis.com/css2?family=DM+Serif+Display&family=DM+Sans:wght@300;400;500&display=swap",
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
},
|
||||
"expressive": {
|
||||
"display_css": "Fraunces",
|
||||
"body_css": "Inter",
|
||||
"gfonts_import": "https://fonts.googleapis.com/css2?family=Fraunces:ital,wght@0,700;0,900;1,900&family=Inter:wght@300;400;500&display=swap",
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
},
|
||||
"scholarly": {
|
||||
"display_css": "EB Garamond",
|
||||
"body_css": "Source Sans 3",
|
||||
"gfonts_import": "https://fonts.googleapis.com/css2?family=EB+Garamond:ital,wght@0,400;0,700;1,400&family=Source+Sans+3:wght@400;600&display=swap",
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
},
|
||||
"neutral": {
|
||||
"display_css": "Outfit",
|
||||
"body_css": "Outfit",
|
||||
"gfonts_import": "https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;700;900&display=swap",
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
},
|
||||
"restrained": {
|
||||
"display_css": "Cormorant Garamond",
|
||||
"body_css": "Jost",
|
||||
"gfonts_import": "https://fonts.googleapis.com/css2?family=Cormorant+Garamond:ital,wght@0,300;0,600;1,300&family=Jost:wght@300;400;500&display=swap",
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
},
|
||||
"bold": {
|
||||
"display_css": "Barlow Condensed",
|
||||
"body_css": "Barlow",
|
||||
"gfonts_import": "https://fonts.googleapis.com/css2?family=Barlow+Condensed:wght@700;900&family=Barlow:wght@400;500;600&display=swap",
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
},
|
||||
"dynamic": {
|
||||
"display_css": "Montserrat",
|
||||
"body_css": "Montserrat",
|
||||
"gfonts_import": "https://fonts.googleapis.com/css2?family=Montserrat:ital,wght@0,300;0,700;0,900;1,400&display=swap",
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
},
|
||||
"classical": {
|
||||
"display_css": "Cormorant",
|
||||
"body_css": "Crimson Pro",
|
||||
"gfonts_import": "https://fonts.googleapis.com/css2?family=Cormorant:ital,wght@0,400;0,700;1,400&family=Crimson+Pro:wght@400;600&display=swap",
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
},
|
||||
"editorial": {
|
||||
"display_css": "Bebas Neue",
|
||||
"body_css": "Libre Franklin",
|
||||
"gfonts_import": (
|
||||
"https://fonts.googleapis.com/css2?family=Bebas+Neue"
|
||||
"&family=Libre+Franklin:ital,wght@0,400;0,700;1,400&display=swap"
|
||||
),
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
},
|
||||
# ── New moods (v2) ───────────────────────────────────────────────────────────
|
||||
"magazine": {
|
||||
"display_css": "Playfair Display",
|
||||
"body_css": "EB Garamond",
|
||||
"gfonts_import": (
|
||||
"https://fonts.googleapis.com/css2?family=Playfair+Display"
|
||||
":ital,wght@0,700;0,900;1,700"
|
||||
"&family=EB+Garamond:ital,wght@0,400;0,600;1,400&display=swap"
|
||||
),
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
},
|
||||
"darkroom": {
|
||||
"display_css": "Playfair Display",
|
||||
"body_css": "EB Garamond",
|
||||
"gfonts_import": (
|
||||
"https://fonts.googleapis.com/css2?family=Playfair+Display"
|
||||
":ital,wght@0,700;0,900;1,700"
|
||||
"&family=EB+Garamond:ital,wght@0,400;0,600;1,400&display=swap"
|
||||
),
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
},
|
||||
"terminal": {
|
||||
"display_css": "Space Mono",
|
||||
"body_css": "Space Mono",
|
||||
"gfonts_import": (
|
||||
"https://fonts.googleapis.com/css2?family=Space+Mono"
|
||||
":ital,wght@0,400;0,700;1,400&display=swap"
|
||||
),
|
||||
"display_rl": "Courier-Bold",
|
||||
"body_rl": "Courier",
|
||||
"body_b_rl": "Courier-Bold",
|
||||
},
|
||||
"poster": {
|
||||
"display_css": "Barlow Condensed",
|
||||
"body_css": "Courier Prime",
|
||||
"gfonts_import": (
|
||||
"https://fonts.googleapis.com/css2?family=Barlow+Condensed"
|
||||
":wght@700;900"
|
||||
"&family=Courier+Prime:ital,wght@0,400;0,700;1,400&display=swap"
|
||||
),
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Courier",
|
||||
"body_b_rl": "Courier-Bold",
|
||||
},
|
||||
}
|
||||
|
||||
SYSTEM_FALLBACK = {
|
||||
"display_css": "Georgia",
|
||||
"body_css": "Arial",
|
||||
"gfonts_import": "",
|
||||
"display_rl": "Times-Bold",
|
||||
"body_rl": "Helvetica",
|
||||
"body_b_rl": "Helvetica-Bold",
|
||||
}
|
||||
|
||||
|
||||
# ── Colour helpers ──────────────────────────────────────────────────────────────
|
||||
def _hex_to_rgb(h: str) -> tuple:
|
||||
h = h.lstrip("#")
|
||||
return int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
|
||||
|
||||
|
||||
def _lighten(hex_color: str, factor: float = 0.09) -> str:
|
||||
"""Blend hex_color toward white (factor = accent weight, 0=white, 1=full color)."""
|
||||
r, g, b = _hex_to_rgb(hex_color)
|
||||
return "#{:02X}{:02X}{:02X}".format(
|
||||
round(r * factor + 255 * (1 - factor)),
|
||||
round(g * factor + 255 * (1 - factor)),
|
||||
round(b * factor + 255 * (1 - factor)),
|
||||
)
|
||||
|
||||
|
||||
# ── Token assembly ─────────────────────────────────────────────────────────────
|
||||
def build_tokens(
|
||||
title: str,
|
||||
doc_type: str,
|
||||
author: str = "",
|
||||
date: str = "",
|
||||
accent_override: str = "",
|
||||
cover_bg_override: str = "",
|
||||
) -> dict:
|
||||
palette = PALETTES.get(doc_type, PALETTES["general"]).copy()
|
||||
mood = palette["mood"]
|
||||
font_pair = FONT_PAIRS.get(mood, SYSTEM_FALLBACK)
|
||||
|
||||
# Apply caller-supplied overrides before token assembly
|
||||
if accent_override:
|
||||
palette["accent"] = accent_override
|
||||
palette["accent_lt"] = _lighten(accent_override, 0.09)
|
||||
if cover_bg_override:
|
||||
palette["cover_bg"] = cover_bg_override
|
||||
|
||||
tokens = {
|
||||
# Identity
|
||||
"title": title,
|
||||
"author": author,
|
||||
"date": date,
|
||||
"doc_type": doc_type,
|
||||
|
||||
# Palette
|
||||
"cover_bg": palette["cover_bg"],
|
||||
"accent": palette["accent"],
|
||||
"accent_lt": palette["accent_lt"],
|
||||
"text_light": palette["text_light"],
|
||||
"page_bg": palette["page_bg"],
|
||||
"dark": palette["dark"],
|
||||
"body_text": palette["body_text"],
|
||||
"muted": palette["muted"],
|
||||
"cover_pattern": palette["cover_pattern"],
|
||||
"mood": mood,
|
||||
|
||||
# Typography — CSS names for cover HTML (loaded via Google Fonts @import)
|
||||
"font_display": font_pair["display_css"],
|
||||
"font_body": font_pair["body_css"],
|
||||
"gfonts_import": font_pair["gfonts_import"],
|
||||
|
||||
# Typography — ReportLab system font names for body pages
|
||||
"font_display_rl": font_pair["display_rl"],
|
||||
"font_body_rl": font_pair["body_rl"],
|
||||
"font_body_b_rl": font_pair["body_b_rl"],
|
||||
|
||||
# Legacy keys (kept so render_body.py's register_fonts is a no-op)
|
||||
"font_heading": font_pair["display_rl"],
|
||||
"font_body_b": font_pair["body_b_rl"],
|
||||
"font_paths": {},
|
||||
|
||||
# Type scale (pt)
|
||||
"size_display": 54,
|
||||
"size_h1": 22,
|
||||
"size_h2": 15,
|
||||
"size_h3": 11.5,
|
||||
"size_body": 10.5,
|
||||
"size_caption": 8.5,
|
||||
"size_meta": 8,
|
||||
|
||||
# Layout (pt, 1cm ≈ 28.35pt)
|
||||
"margin_left": 79, # 2.8cm
|
||||
"margin_right": 79,
|
||||
"margin_top": 79,
|
||||
"margin_bottom": 71, # 2.5cm
|
||||
"section_gap": 26,
|
||||
"para_gap": 8,
|
||||
"line_gap": 17,
|
||||
}
|
||||
return tokens
|
||||
|
||||
|
||||
# ── CLI ───────────────────────────────────────────────────────────────────────
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate design tokens from document metadata")
|
||||
parser.add_argument("--title", default="Untitled Document")
|
||||
parser.add_argument("--type", default="general",
|
||||
choices=list(PALETTES.keys()),
|
||||
help="Document type: " + ", ".join(PALETTES.keys()))
|
||||
parser.add_argument("--author", default="")
|
||||
parser.add_argument("--date", default="")
|
||||
parser.add_argument("--meta", help="JSON file with title/type/author/date keys")
|
||||
parser.add_argument("--accent", default="",
|
||||
help="Override accent colour (hex, e.g. #2D6A8F). "
|
||||
"accent_lt is auto-derived by lightening toward white.")
|
||||
parser.add_argument("--cover-bg", default="",
|
||||
help="Override cover background colour (hex).")
|
||||
parser.add_argument("--out", default="tokens.json")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.meta:
|
||||
try:
|
||||
with open(args.meta) as f:
|
||||
meta = json.load(f)
|
||||
args.title = meta.get("title", args.title)
|
||||
args.type = meta.get("type", args.type)
|
||||
args.author = meta.get("author", args.author)
|
||||
args.date = meta.get("date", args.date)
|
||||
except Exception as e:
|
||||
print(json.dumps({"status": "error", "error": str(e)}), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
tokens = build_tokens(
|
||||
args.title, args.type, args.author, args.date,
|
||||
accent_override=args.accent,
|
||||
cover_bg_override=getattr(args, "cover_bg", ""),
|
||||
)
|
||||
|
||||
try:
|
||||
with open(args.out, "w") as f:
|
||||
json.dump(tokens, f, indent=2)
|
||||
except Exception as e:
|
||||
print(json.dumps({"status": "error", "error": str(e)}), file=sys.stderr)
|
||||
sys.exit(3)
|
||||
|
||||
print(json.dumps({
|
||||
"status": "ok",
|
||||
"out": args.out,
|
||||
"mood": tokens["mood"],
|
||||
"pattern": tokens["cover_pattern"],
|
||||
"fonts": f'{tokens["font_display"]} / {tokens["font_body"]}',
|
||||
}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
374
minimax-pdf/scripts/reformat_parse.py
Normal file
374
minimax-pdf/scripts/reformat_parse.py
Normal file
@@ -0,0 +1,374 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
reformat_parse.py — Convert an existing document into content.json,
|
||||
then hand off to the CREATE pipeline (render_body.py).
|
||||
|
||||
Supported input formats:
|
||||
.md / .txt — Markdown / plain text
|
||||
.pdf — Extract text from existing PDF (layout preserved as best-effort)
|
||||
.json — Pass-through if already content.json format
|
||||
|
||||
Usage:
|
||||
python3 reformat_parse.py --input doc.md --out content.json
|
||||
python3 reformat_parse.py --input old.pdf --out content.json
|
||||
python3 reformat_parse.py --input data.json --out content.json
|
||||
|
||||
Then pipe into the CREATE pipeline:
|
||||
python3 render_body.py --tokens tokens.json --content content.json --out body.pdf
|
||||
|
||||
Or use make.sh reformat which does both steps:
|
||||
bash make.sh reformat --input doc.md --type report --title "My Report" --out output.pdf
|
||||
|
||||
Exit codes: 0 success, 1 bad args / unsupported format, 2 dep missing, 3 parse error
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
|
||||
|
||||
def ensure_deps():
|
||||
missing = []
|
||||
if importlib.util.find_spec("pypdf") is None:
|
||||
missing.append("pypdf")
|
||||
if missing:
|
||||
import subprocess
|
||||
subprocess.check_call(
|
||||
[sys.executable, "-m", "pip", "install", "--break-system-packages", "-q"] + missing
|
||||
)
|
||||
|
||||
|
||||
ensure_deps()
|
||||
|
||||
|
||||
# ── Markdown / plain text parser ───────────────────────────────────────────────
|
||||
def parse_markdown(text: str) -> list:
|
||||
"""
|
||||
Convert Markdown to content.json blocks.
|
||||
Supports: # headings, **bold**, bullet lists, > blockquotes (→ callout),
|
||||
| tables |, plain paragraphs.
|
||||
"""
|
||||
blocks = []
|
||||
lines = text.splitlines()
|
||||
i = 0
|
||||
|
||||
def flush_para(buf: list):
|
||||
t = " ".join(buf).strip()
|
||||
if t:
|
||||
blocks.append({"type": "body", "text": _md_inline(t)})
|
||||
|
||||
para_buf = []
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
stripped = line.strip()
|
||||
|
||||
# Blank line — flush paragraph buffer
|
||||
if not stripped:
|
||||
flush_para(para_buf)
|
||||
para_buf = []
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# ATX Headings: # ## ###
|
||||
m = re.match(r'^(#{1,3})\s+(.*)', stripped)
|
||||
if m:
|
||||
flush_para(para_buf)
|
||||
para_buf = []
|
||||
level = len(m.group(1))
|
||||
htype = {1: "h1", 2: "h2", 3: "h3"}.get(level, "h3")
|
||||
blocks.append({"type": htype, "text": _md_inline(m.group(2))})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Display math block: $$expr$$ on one line, or opening $$ ... closing $$
|
||||
if stripped.startswith("$$"):
|
||||
flush_para(para_buf)
|
||||
para_buf = []
|
||||
inline_expr = stripped[2:].rstrip("$").strip()
|
||||
if inline_expr:
|
||||
# Single-line: $$E = mc^2$$
|
||||
blocks.append({"type": "math", "text": inline_expr})
|
||||
i += 1
|
||||
else:
|
||||
# Multi-line: opening $$ alone, then expression lines, then closing $$
|
||||
math_lines = []
|
||||
i += 1
|
||||
while i < len(lines) and lines[i].strip() != "$$":
|
||||
math_lines.append(lines[i])
|
||||
i += 1
|
||||
if i < len(lines):
|
||||
i += 1 # skip closing $$
|
||||
blocks.append({"type": "math", "text": "\n".join(math_lines).strip()})
|
||||
continue
|
||||
|
||||
# Fenced code block: ``` or ~~~
|
||||
if stripped.startswith("```") or stripped.startswith("~~~"):
|
||||
flush_para(para_buf)
|
||||
para_buf = []
|
||||
fence = stripped[:3]
|
||||
code_lines = []
|
||||
i += 1
|
||||
while i < len(lines) and not lines[i].strip().startswith(fence):
|
||||
code_lines.append(lines[i])
|
||||
i += 1
|
||||
if i < len(lines):
|
||||
i += 1 # skip closing fence
|
||||
blocks.append({"type": "code", "text": "\n".join(code_lines)})
|
||||
continue
|
||||
|
||||
# Blockquote → callout
|
||||
if stripped.startswith(">"):
|
||||
flush_para(para_buf)
|
||||
para_buf = []
|
||||
qt = re.sub(r'^>\s*', '', stripped)
|
||||
blocks.append({"type": "callout", "text": _md_inline(qt)})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Unordered bullet: -, *, +
|
||||
if re.match(r'^[-*+]\s+', stripped):
|
||||
flush_para(para_buf)
|
||||
para_buf = []
|
||||
text_part = re.sub(r'^[-*+]\s+', '', stripped)
|
||||
blocks.append({"type": "bullet", "text": _md_inline(text_part)})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Ordered list: 1. 2. etc. → numbered (preserves counter in render_body)
|
||||
if re.match(r'^\d+\.\s+', stripped):
|
||||
flush_para(para_buf)
|
||||
para_buf = []
|
||||
text_part = re.sub(r'^\d+\.\s+', '', stripped)
|
||||
blocks.append({"type": "numbered", "text": _md_inline(text_part)})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Table: | col | col |
|
||||
if stripped.startswith("|"):
|
||||
flush_para(para_buf)
|
||||
para_buf = []
|
||||
table_lines = []
|
||||
while i < len(lines) and lines[i].strip().startswith("|"):
|
||||
table_lines.append(lines[i].strip())
|
||||
i += 1
|
||||
# Remove separator rows (|---|---|)
|
||||
data_rows = [r for r in table_lines if not re.match(r'^\|[-:| ]+\|$', r)]
|
||||
parsed = []
|
||||
for row in data_rows:
|
||||
cells = [c.strip() for c in row.strip("|").split("|")]
|
||||
parsed.append(cells)
|
||||
if len(parsed) >= 2:
|
||||
blocks.append({
|
||||
"type": "table",
|
||||
"headers": parsed[0],
|
||||
"rows": parsed[1:],
|
||||
})
|
||||
elif len(parsed) == 1:
|
||||
# Single row — treat as paragraph
|
||||
blocks.append({"type": "body", "text": " | ".join(parsed[0])})
|
||||
continue
|
||||
|
||||
# Horizontal rule → spacer
|
||||
if re.match(r'^[-*_]{3,}$', stripped):
|
||||
flush_para(para_buf)
|
||||
para_buf = []
|
||||
blocks.append({"type": "spacer", "pt": 16})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Plain text → accumulate into paragraph
|
||||
para_buf.append(stripped)
|
||||
i += 1
|
||||
|
||||
flush_para(para_buf)
|
||||
return blocks
|
||||
|
||||
|
||||
def _md_inline(text: str) -> str:
|
||||
"""Convert inline Markdown to ReportLab XML markup."""
|
||||
# Bold: **text** or __text__
|
||||
text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
|
||||
text = re.sub(r'__(.+?)__', r'<b>\1</b>', text)
|
||||
# Italic: *text* or _text_
|
||||
text = re.sub(r'\*(.+?)\*', r'<i>\1</i>', text)
|
||||
text = re.sub(r'_(.+?)_', r'<i>\1</i>', text)
|
||||
# Inline code: `code`
|
||||
text = re.sub(r'`(.+?)`', r'<font name="Courier">\1</font>', text)
|
||||
# Strip markdown links, keep text
|
||||
text = re.sub(r'\[(.+?)\]\(.+?\)', r'\1', text)
|
||||
return text
|
||||
|
||||
|
||||
# ── PDF text extractor ─────────────────────────────────────────────────────────
|
||||
def parse_pdf(pdf_path: str) -> list:
|
||||
"""
|
||||
Extract text from an existing PDF and convert to content.json blocks.
|
||||
Best-effort: detects headings by font size heuristics if available,
|
||||
otherwise falls back to paragraph splitting.
|
||||
"""
|
||||
from pypdf import PdfReader
|
||||
|
||||
reader = PdfReader(pdf_path)
|
||||
all_text = []
|
||||
|
||||
for page in reader.pages:
|
||||
text = page.extract_text()
|
||||
if text:
|
||||
all_text.append(text.strip())
|
||||
|
||||
full_text = "\n\n".join(all_text)
|
||||
|
||||
# Treat extracted PDF text as plain text / light markdown
|
||||
# (most PDFs lose formatting — we do our best)
|
||||
return parse_plain(full_text)
|
||||
|
||||
|
||||
def parse_plain(text: str) -> list:
|
||||
"""
|
||||
Heuristic plain-text parser.
|
||||
Short ALL-CAPS or title-case lines → headings.
|
||||
Everything else → paragraphs.
|
||||
"""
|
||||
blocks = []
|
||||
paragraphs = re.split(r'\n{2,}', text.strip())
|
||||
|
||||
for para in paragraphs:
|
||||
para = para.strip()
|
||||
if not para:
|
||||
continue
|
||||
|
||||
lines = para.splitlines()
|
||||
|
||||
# Single short line that looks like a heading
|
||||
if len(lines) == 1 and len(para) < 80:
|
||||
if para.isupper() or re.match(r'^[A-Z][^.!?]*$', para):
|
||||
blocks.append({"type": "h1", "text": para.title()})
|
||||
continue
|
||||
|
||||
# Bullet lists
|
||||
if lines[0].startswith(("- ", "• ", "* ")):
|
||||
for line in lines:
|
||||
text_part = re.sub(r'^[-•*]\s+', '', line.strip())
|
||||
if text_part:
|
||||
blocks.append({"type": "bullet", "text": text_part})
|
||||
continue
|
||||
|
||||
# Regular paragraph
|
||||
blocks.append({"type": "body", "text": " ".join(lines)})
|
||||
|
||||
return blocks
|
||||
|
||||
|
||||
# ── Pass-through validator ─────────────────────────────────────────────────────
|
||||
VALID_TYPES = {"h1","h2","h3","body","bullet","numbered","callout","table",
|
||||
"image","code","math","divider","caption","pagebreak","spacer"}
|
||||
|
||||
def validate_content_json(data: list) -> tuple[list, list]:
|
||||
"""Return (valid_blocks, warnings)."""
|
||||
valid, warnings = [], []
|
||||
for i, block in enumerate(data):
|
||||
if not isinstance(block, dict):
|
||||
warnings.append(f"Block {i}: not a dict, skipped")
|
||||
continue
|
||||
btype = block.get("type")
|
||||
if btype not in VALID_TYPES:
|
||||
warnings.append(f"Block {i}: unknown type '{btype}', kept as-is")
|
||||
valid.append(block)
|
||||
return valid, warnings
|
||||
|
||||
|
||||
# ── Dispatcher ─────────────────────────────────────────────────────────────────
|
||||
def parse_file(input_path: str) -> tuple[list, list]:
|
||||
"""Return (blocks, warnings)."""
|
||||
ext = Path(input_path).suffix.lower()
|
||||
|
||||
if ext in (".md", ".txt", ".markdown"):
|
||||
with open(input_path, encoding="utf-8", errors="replace") as f:
|
||||
text = f.read()
|
||||
blocks = parse_markdown(text)
|
||||
return blocks, []
|
||||
|
||||
if ext == ".pdf":
|
||||
blocks = parse_pdf(input_path)
|
||||
return blocks, ["PDF text extraction is best-effort — review content.json before rendering"]
|
||||
|
||||
if ext == ".json":
|
||||
with open(input_path) as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, list):
|
||||
return validate_content_json(data)
|
||||
# Maybe it's a meta-wrapper {"content": [...]}
|
||||
if isinstance(data, dict) and "content" in data:
|
||||
return validate_content_json(data["content"])
|
||||
return [], [f"JSON file does not contain a list of content blocks"]
|
||||
|
||||
return [], [f"Unsupported file type: {ext}. Supported: .md .txt .pdf .json"]
|
||||
|
||||
|
||||
# ── CLI ────────────────────────────────────────────────────────────────────────
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Parse a document into content.json")
|
||||
parser.add_argument("--input", required=True, help="Input file (.md, .txt, .pdf, .json)")
|
||||
parser.add_argument("--out", default="content.json", help="Output content.json path")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.input):
|
||||
print(json.dumps({"status": "error", "error": f"File not found: {args.input}"}),
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
blocks, warnings = parse_file(args.input)
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(json.dumps({"status": "error", "error": str(e),
|
||||
"trace": traceback.format_exc()}), file=sys.stderr)
|
||||
sys.exit(3)
|
||||
|
||||
if not blocks:
|
||||
print(json.dumps({
|
||||
"status": "error",
|
||||
"error": "No content blocks extracted",
|
||||
"warnings": warnings,
|
||||
}), file=sys.stderr)
|
||||
sys.exit(3)
|
||||
|
||||
with open(args.out, "w", encoding="utf-8") as f:
|
||||
json.dump(blocks, f, indent=2, ensure_ascii=False)
|
||||
|
||||
result = {
|
||||
"status": "ok",
|
||||
"out": args.out,
|
||||
"block_count": len(blocks),
|
||||
"warnings": warnings,
|
||||
}
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
print(f"\n── Parsed {args.input} ─────────────────────────────────────",
|
||||
file=sys.stderr)
|
||||
print(f" Blocks : {len(blocks)}", file=sys.stderr)
|
||||
|
||||
type_counts: dict = {}
|
||||
for b in blocks:
|
||||
type_counts[b.get("type","?")] = type_counts.get(b.get("type","?"), 0) + 1
|
||||
for t, n in sorted(type_counts.items()):
|
||||
print(f" {t:12} × {n}", file=sys.stderr)
|
||||
|
||||
if warnings:
|
||||
print(f" Warnings:", file=sys.stderr)
|
||||
for w in warnings:
|
||||
print(f" ⚠ {w}", file=sys.stderr)
|
||||
print(f"\n Next: bash make.sh run --content {args.out} --title '...' --type ...",
|
||||
file=sys.stderr)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
1052
minimax-pdf/scripts/render_body.py
Normal file
1052
minimax-pdf/scripts/render_body.py
Normal file
File diff suppressed because it is too large
Load Diff
111
minimax-pdf/scripts/render_cover.js
Normal file
111
minimax-pdf/scripts/render_cover.js
Normal file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* render_cover.js — Render cover.html → cover.pdf via Playwright.
|
||||
*
|
||||
* Usage:
|
||||
* node render_cover.js --input cover.html --out cover.pdf
|
||||
* node render_cover.js --input cover.html --out cover.pdf --wait 1200
|
||||
*
|
||||
* Exit codes: 0 success, 1 bad args, 2 dependency missing, 3 render error
|
||||
*/
|
||||
|
||||
const path = require("path");
|
||||
const fs = require("fs");
|
||||
|
||||
function usage() {
|
||||
console.error("Usage: node render_cover.js --input <file.html> --out <file.pdf> [--wait <ms>]");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// ── Arg parsing ────────────────────────────────────────────────────────────────
|
||||
const args = process.argv.slice(2);
|
||||
let inputFile = null, outFile = null, waitMs = 800;
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if (args[i] === "--input" && args[i + 1]) { inputFile = args[++i]; }
|
||||
else if (args[i] === "--out" && args[i + 1]) { outFile = args[++i]; }
|
||||
else if (args[i] === "--wait" && args[i + 1]) { waitMs = parseInt(args[++i], 10); }
|
||||
}
|
||||
|
||||
if (!inputFile || !outFile) usage();
|
||||
if (!fs.existsSync(inputFile)) {
|
||||
console.error(JSON.stringify({ status: "error", error: `File not found: ${inputFile}` }));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// ── Playwright loader (tolerates global npm installs) ─────────────────────────
|
||||
function loadPlaywright() {
|
||||
const { execSync } = require("child_process");
|
||||
try { return require("playwright"); } catch (_) {}
|
||||
try {
|
||||
const root = execSync("npm root -g", { stdio: ["ignore","pipe","ignore"] }).toString().trim();
|
||||
return require(path.join(root, "playwright"));
|
||||
} catch (_) {}
|
||||
console.error(JSON.stringify({
|
||||
status: "error",
|
||||
error: "playwright not found",
|
||||
hint: "Run: npm install -g playwright && npx playwright install chromium"
|
||||
}));
|
||||
process.exit(2);
|
||||
}
|
||||
|
||||
// ── Main ───────────────────────────────────────────────────────────────────────
|
||||
(async () => {
|
||||
const { chromium } = loadPlaywright();
|
||||
|
||||
let browser;
|
||||
try {
|
||||
browser = await chromium.launch();
|
||||
} catch (e) {
|
||||
// Chromium binary missing — try installing
|
||||
const { spawnSync } = require("child_process");
|
||||
const r = spawnSync("npx", ["playwright", "install", "chromium"], { stdio: "inherit", shell: true });
|
||||
if (r.status !== 0) {
|
||||
console.error(JSON.stringify({
|
||||
status: "error",
|
||||
error: "Chromium not installed and auto-install failed",
|
||||
hint: "Run: npx playwright install chromium"
|
||||
}));
|
||||
process.exit(2);
|
||||
}
|
||||
browser = await chromium.launch();
|
||||
}
|
||||
|
||||
try {
|
||||
const page = await browser.newPage();
|
||||
const fileUrl = "file://" + path.resolve(inputFile);
|
||||
await page.goto(fileUrl);
|
||||
await page.waitForTimeout(waitMs); // let CSS + any JS settle
|
||||
|
||||
await page.pdf({
|
||||
path: outFile,
|
||||
width: "794px",
|
||||
height: "1123px",
|
||||
printBackground: true,
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
|
||||
// Basic sanity: output file must exist and be > 5 KB
|
||||
const stat = fs.statSync(outFile);
|
||||
if (stat.size < 5000) {
|
||||
console.error(JSON.stringify({
|
||||
status: "error",
|
||||
error: "Output PDF is suspiciously small — cover may be blank",
|
||||
hint: "Check cover.html for render errors"
|
||||
}));
|
||||
process.exit(3);
|
||||
}
|
||||
|
||||
console.log(JSON.stringify({
|
||||
status: "ok",
|
||||
out: outFile,
|
||||
size_kb: Math.round(stat.size / 1024),
|
||||
}));
|
||||
|
||||
} catch (e) {
|
||||
if (browser) await browser.close().catch(() => {});
|
||||
console.error(JSON.stringify({ status: "error", error: String(e) }));
|
||||
process.exit(3);
|
||||
}
|
||||
})();
|
||||
Reference in New Issue
Block a user