Initial commit: add all skills files

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-10 16:52:49 +08:00
commit 6487becf60
396 changed files with 108871 additions and 0 deletions

1579
minimax-pdf/scripts/cover.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,200 @@
#!/usr/bin/env python3
"""
fill_inspect.py — Inspect form fields in an existing PDF.
Usage:
python3 fill_inspect.py --input form.pdf
python3 fill_inspect.py --input form.pdf --out fields.json
Outputs a JSON summary of every fillable field: name, type, current value,
allowed values (for checkboxes / dropdowns), and page number.
Exit codes: 0 success, 1 bad args / file not found, 2 dep missing, 3 read error
"""
import argparse
import json
import sys
import importlib.util
import os
def ensure_deps():
if importlib.util.find_spec("pypdf") is None:
import subprocess
subprocess.check_call(
[sys.executable, "-m", "pip", "install", "--break-system-packages", "-q", "pypdf"]
)
ensure_deps()
from pypdf import PdfReader
from pypdf.generic import ArrayObject, DictionaryObject, NameObject, TextStringObject
# ── Field type resolution ──────────────────────────────────────────────────────
def _field_type(field) -> str:
ft = field.get("/FT")
if ft is None:
return "unknown"
ft = str(ft)
if ft == "/Tx":
return "text"
if ft == "/Btn":
ff = int(field.get("/Ff", 0))
return "radio" if ff & (1 << 15) else "checkbox"
if ft == "/Ch":
ff = int(field.get("/Ff", 0))
return "dropdown" if ff & (1 << 17) else "listbox"
if ft == "/Sig":
return "signature"
return "unknown"
def _field_value(field) -> str | None:
v = field.get("/V")
return str(v) if v is not None else None
def _field_options(field, ftype: str) -> dict:
extra = {}
if ftype in ("checkbox",):
ap = field.get("/AP")
if ap and "/N" in ap:
states = [str(k) for k in ap["/N"]]
extra["states"] = states
checked = next((s for s in states if s != "/Off"), None)
if checked:
extra["checked_value"] = checked
if ftype in ("dropdown", "listbox"):
opt = field.get("/Opt")
if opt:
choices = []
for item in opt:
if isinstance(item, (list, ArrayObject)) and len(item) >= 2:
choices.append({"value": str(item[0]), "label": str(item[1])})
else:
choices.append({"value": str(item), "label": str(item)})
extra["choices"] = choices
if ftype == "radio":
kids = field.get("/Kids")
if kids:
values = []
for kid in kids:
ap = kid.get("/AP")
if ap and "/N" in ap:
for k in ap["/N"]:
if str(k) != "/Off":
values.append(str(k))
extra["radio_values"] = values
return extra
def _walk_fields(fields, page_map: dict, parent_name: str = "") -> list:
"""Recursively collect all leaf fields."""
result = []
for field in fields:
name = str(field.get("/T", ""))
full = f"{parent_name}.{name}" if parent_name else name
kids = field.get("/Kids")
# Kids that have /T are sub-fields (groups), not widget annotations
if kids:
named_kids = [k for k in kids if "/T" in k]
if named_kids:
result.extend(_walk_fields(named_kids, page_map, full))
continue
ftype = _field_type(field)
if ftype == "unknown":
continue
entry = {
"name": full,
"type": ftype,
"value": _field_value(field),
}
entry.update(_field_options(field, ftype))
# Page lookup via /P indirect reference
p_ref = field.get("/P")
if p_ref and hasattr(p_ref, "idnum"):
entry["page"] = page_map.get(p_ref.idnum, "?")
result.append(entry)
return result
def inspect(pdf_path: str) -> dict:
try:
reader = PdfReader(pdf_path)
except Exception as e:
return {"status": "error", "error": str(e)}
# Build page-number lookup: {object_id: 1-based page number}
page_map = {}
for i, page in enumerate(reader.pages):
if hasattr(page, "indirect_reference") and page.indirect_reference:
page_map[page.indirect_reference.idnum] = i + 1
acroform = reader.trailer.get("/Root", {}).get("/AcroForm")
if acroform is None or "/Fields" not in acroform:
return {
"status": "ok",
"has_fields": False,
"field_count": 0,
"fields": [],
"note": "This PDF has no fillable form fields.",
}
fields = _walk_fields(list(acroform["/Fields"]), page_map)
return {
"status": "ok",
"has_fields": bool(fields),
"field_count": len(fields),
"fields": fields,
}
def main():
parser = argparse.ArgumentParser(description="Inspect PDF form fields")
parser.add_argument("--input", required=True, help="PDF file to inspect")
parser.add_argument("--out", default="", help="Write JSON to file (optional)")
args = parser.parse_args()
if not os.path.exists(args.input):
print(json.dumps({"status": "error", "error": f"File not found: {args.input}"}),
file=sys.stderr)
sys.exit(1)
result = inspect(args.input)
output = json.dumps(result, indent=2, ensure_ascii=False)
if args.out:
with open(args.out, "w") as f:
f.write(output)
print(output)
# Human-readable summary
if result["status"] == "ok" and result["has_fields"]:
print(f"\n── Fields in {args.input} ──────────────────────────────",
file=sys.stderr)
for f in result["fields"]:
pg = f" p.{f['page']}" if "page" in f else ""
val = f" = {f['value']}" if f.get("value") else ""
extra = ""
if "choices" in f:
extra = f" [{', '.join(c['value'] for c in f['choices'][:4])}{'' if len(f['choices'])>4 else ''}]"
elif "states" in f:
extra = f" {f['states']}"
print(f" {f['type']:12} {f['name']}{pg}{val}{extra}", file=sys.stderr)
print("", file=sys.stderr)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,242 @@
#!/usr/bin/env python3
"""
fill_write.py — Write values into PDF form fields.
Usage:
# From a JSON data file
python3 fill_write.py --input form.pdf --data values.json --out filled.pdf
# Inline JSON
python3 fill_write.py --input form.pdf --out filled.pdf \
--values '{"FirstName": "Jane", "Agree": "true"}'
values format:
{
"FieldName": "text value", # text field
"CheckBox1": "true", # checkbox (true / false)
"Dropdown1": "OptionValue", # dropdown (must match an existing choice value)
"Radio1": "/Choice2" # radio (must match a radio value)
}
Exit codes: 0 success, 1 bad args, 2 dep missing, 3 read/write error, 4 validation error
"""
import argparse
import json
import os
import sys
import importlib.util
def ensure_deps():
if importlib.util.find_spec("pypdf") is None:
import subprocess
subprocess.check_call(
[sys.executable, "-m", "pip", "install", "--break-system-packages", "-q", "pypdf"]
)
ensure_deps()
from pypdf import PdfReader, PdfWriter
from pypdf.generic import NameObject, TextStringObject, BooleanObject
# ── Field helpers ─────────────────────────────────────────────────────────────
def _field_type(field) -> str:
ft = str(field.get("/FT", ""))
if ft == "/Tx": return "text"
if ft == "/Btn":
ff = int(field.get("/Ff", 0))
return "radio" if ff & (1 << 15) else "checkbox"
if ft == "/Ch":
ff = int(field.get("/Ff", 0))
return "dropdown" if ff & (1 << 17) else "listbox"
return "unknown"
def _get_checkbox_on_value(field) -> str:
"""Return the /AP /N key that means 'checked' (anything except /Off)."""
ap = field.get("/AP")
if ap and "/N" in ap:
for k in ap["/N"]:
if str(k) != "/Off":
return str(k)
return "/Yes"
def _get_dropdown_values(field) -> list[str]:
opt = field.get("/Opt")
if not opt:
return []
values = []
for item in opt:
try:
from pypdf.generic import ArrayObject
if isinstance(item, (list, ArrayObject)) and len(item) >= 1:
values.append(str(item[0]))
else:
values.append(str(item))
except Exception:
values.append(str(item))
return values
# ── Walk + fill ───────────────────────────────────────────────────────────────
def _walk_and_fill(fields, data: dict, filled: list, errors: list, parent: str = ""):
for field in fields:
name = str(field.get("/T", ""))
full = f"{parent}.{name}" if parent else name
# Recurse into named groups
kids = field.get("/Kids")
if kids:
named = [k for k in kids if "/T" in k]
if named:
_walk_and_fill(named, data, filled, errors, full)
continue
if full not in data:
continue
value = data[full]
ftype = _field_type(field)
if ftype == "text":
field.update({
NameObject("/V"): TextStringObject(str(value)),
NameObject("/DV"): TextStringObject(str(value)),
})
filled.append(full)
elif ftype == "checkbox":
truthy = str(value).lower() in ("true", "1", "yes", "on")
on_val = _get_checkbox_on_value(field)
pdf_val = on_val if truthy else "/Off"
field.update({
NameObject("/V"): NameObject(pdf_val),
NameObject("/AS"): NameObject(pdf_val),
})
filled.append(full)
elif ftype in ("dropdown", "listbox"):
allowed = _get_dropdown_values(field)
if allowed and str(value) not in allowed:
errors.append({
"field": full,
"error": f"Value '{value}' not in allowed choices: {allowed}"
})
continue
field.update({NameObject("/V"): TextStringObject(str(value))})
filled.append(full)
elif ftype == "radio":
# Radio value must start with /
pdf_val = str(value) if str(value).startswith("/") else f"/{value}"
field.update({
NameObject("/V"): NameObject(pdf_val),
NameObject("/AS"): NameObject(pdf_val),
})
filled.append(full)
else:
errors.append({"field": full, "error": f"Unsupported field type: {ftype}"})
def fill(pdf_path: str, out_path: str, data: dict) -> dict:
try:
reader = PdfReader(pdf_path)
except Exception as e:
return {"status": "error", "error": str(e)}
writer = PdfWriter()
writer.clone_document_from_reader(reader)
acroform = writer._root_object.get("/AcroForm") # type: ignore[attr-defined]
if acroform is None or "/Fields" not in acroform:
return {
"status": "error",
"error": "This PDF has no fillable form fields.",
"hint": "Run fill_inspect.py first to confirm the PDF has fields.",
}
# Enable appearance regeneration so viewers show the new values
acroform.update({NameObject("/NeedAppearances"): BooleanObject(True)})
filled: list[str] = []
errors: list[dict] = []
_walk_and_fill(list(acroform["/Fields"]), data, filled, errors)
# Warn about requested fields that were never found
not_found = [k for k in data if k not in filled and not any(e["field"] == k for e in errors)]
try:
os.makedirs(os.path.dirname(os.path.abspath(out_path)), exist_ok=True)
with open(out_path, "wb") as f:
writer.write(f)
except Exception as e:
return {"status": "error", "error": f"Write failed: {e}"}
result = {
"status": "ok",
"out": out_path,
"filled_count": len(filled),
"filled_fields": filled,
"size_kb": os.path.getsize(out_path) // 1024,
}
if errors:
result["validation_errors"] = errors
if not_found:
result["not_found"] = not_found
result["hint"] = "Run fill_inspect.py to see all available field names."
return result
def main():
parser = argparse.ArgumentParser(description="Fill PDF form fields")
parser.add_argument("--input", required=True, help="Input PDF with form fields")
parser.add_argument("--out", required=True, help="Output PDF path")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--data", help="Path to JSON file with field values")
group.add_argument("--values", help="Inline JSON string with field values")
args = parser.parse_args()
if not os.path.exists(args.input):
print(json.dumps({"status": "error", "error": f"File not found: {args.input}"}),
file=sys.stderr)
sys.exit(1)
# Load data
try:
if args.data:
with open(args.data) as f:
data = json.load(f)
else:
data = json.loads(args.values)
except Exception as e:
print(json.dumps({"status": "error", "error": f"JSON parse error: {e}"}),
file=sys.stderr)
sys.exit(1)
result = fill(args.input, args.out, data)
print(json.dumps(result, indent=2, ensure_ascii=False))
if result["status"] == "ok":
print(f"\n── Fill complete ───────────────────────────────────────",
file=sys.stderr)
print(f" Output : {result['out']}", file=sys.stderr)
print(f" Filled : {result['filled_count']} field(s)", file=sys.stderr)
if result.get("validation_errors"):
print(f" Errors :", file=sys.stderr)
for e in result["validation_errors"]:
print(f"{e['field']}: {e['error']}", file=sys.stderr)
if result.get("not_found"):
print(f" Not found: {result['not_found']}", file=sys.stderr)
print("", file=sys.stderr)
else:
sys.exit(3)
if __name__ == "__main__":
main()

491
minimax-pdf/scripts/make.sh Normal file
View File

@@ -0,0 +1,491 @@
#!/usr/bin/env bash
# make.sh — minimax-pdf unified CLI
# Usage: bash make.sh <command> [options]
#
# Commands:
# check Verify all dependencies
# fix Auto-install missing dependencies
# run --title T --type TYPE Full pipeline → output.pdf
# --out FILE Output path (default: output.pdf)
# --author A --date D
# --subtitle S
# --abstract A Optional abstract text for cover
# --cover-image URL Optional cover image URL/path
# --content FILE Path to content.json (optional)
# demo Build a full-featured demo to demo.pdf
#
# Document types:
# report proposal resume portfolio academic general
# minimal stripe diagonal frame editorial
# magazine darkroom terminal poster
#
# Content block types:
# h1 h2 h3 body bullet numbered callout table
# image figure code math chart flowchart bibliography
# divider caption pagebreak spacer
#
# Exit codes: 0 success, 1 usage error, 2 dep missing, 3 runtime error
set -euo pipefail
SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PY="python3"
NODE="node"
# ── Colour helpers ─────────────────────────────────────────────────────────────
red() { printf '\033[0;31m%s\033[0m\n' "$*"; }
green() { printf '\033[0;32m%s\033[0m\n' "$*"; }
yellow() { printf '\033[0;33m%s\033[0m\n' "$*"; }
bold() { printf '\033[1m%s\033[0m\n' "$*"; }
# ── check ──────────────────────────────────────────────────────────────────────
cmd_check() {
local ok=true
bold "Checking dependencies..."
# Python
if command -v python3 &>/dev/null; then
green " ✓ python3 $(python3 --version 2>&1 | awk '{print $2}')"
else
red " ✗ python3 not found"
ok=false
fi
# reportlab
if python3 -c "import reportlab" 2>/dev/null; then
green " ✓ reportlab"
else
yellow " ⚠ reportlab not installed (run: make.sh fix)"
ok=false
fi
# pypdf
if python3 -c "import pypdf" 2>/dev/null; then
green " ✓ pypdf"
else
yellow " ⚠ pypdf not installed (run: make.sh fix)"
ok=false
fi
# Node.js
if command -v node &>/dev/null; then
green " ✓ node $(node --version)"
else
red " ✗ node not found — cover rendering unavailable"
ok=false
fi
# Playwright
if node -e "require('playwright')" 2>/dev/null || \
node -e "require(require('child_process').execSync('npm root -g').toString().trim()+'/playwright')" 2>/dev/null; then
green " ✓ playwright"
else
yellow " ⚠ playwright not found (run: make.sh fix)"
ok=false
fi
# matplotlib (optional — required for math/chart/flowchart; degrades gracefully)
if python3 -c "import matplotlib" 2>/dev/null; then
green " ✓ matplotlib (math, chart, flowchart blocks enabled)"
else
yellow " ⚠ matplotlib not installed — math/chart/flowchart blocks degrade to text (run: make.sh fix)"
fi
if $ok; then
green "\nAll dependencies satisfied."
exit 0
else
yellow "\nSome dependencies missing. Run: bash make.sh fix"
exit 2
fi
}
# ── fix ────────────────────────────────────────────────────────────────────────
cmd_fix() {
bold "Installing missing dependencies..."
local rc=0
# Python packages
if command -v python3 &>/dev/null; then
python3 -m pip install --break-system-packages -q reportlab pypdf matplotlib 2>/dev/null \
|| python3 -m pip install -q reportlab pypdf matplotlib 2>/dev/null \
|| { yellow " pip install failed — try: pip install reportlab pypdf matplotlib"; rc=3; }
green " ✓ Python packages installed (reportlab, pypdf, matplotlib)"
fi
# Playwright
if command -v npm &>/dev/null; then
npm install -g playwright --silent 2>/dev/null && \
npx playwright install chromium --silent 2>/dev/null && \
green " ✓ Playwright + Chromium installed" || \
{ yellow " playwright install failed — try manually"; rc=3; }
else
yellow " npm not found — cannot install Playwright automatically"
rc=2
fi
if [[ $rc -eq 0 ]]; then
green "\nAll dependencies installed. Run: bash make.sh check"
fi
exit $rc
}
# ── run ────────────────────────────────────────────────────────────────────────
cmd_run() {
local title="Untitled Document"
local type="general"
local author=""
local date=""
local subtitle=""
local abstract=""
local cover_image=""
local accent=""
local cover_bg=""
local content_file=""
local out="output.pdf"
local workdir
workdir="$(mktemp -d)"
# Parse options
while [[ $# -gt 0 ]]; do
case "$1" in
--title) title="$2"; shift 2 ;;
--type) type="$2"; shift 2 ;;
--author) author="$2"; shift 2 ;;
--date) date="$2"; shift 2 ;;
--subtitle) subtitle="$2"; shift 2 ;;
--abstract) abstract="$2"; shift 2 ;;
--cover-image) cover_image="$2"; shift 2 ;;
--accent) accent="$2"; shift 2 ;;
--cover-bg) cover_bg="$2"; shift 2 ;;
--content) content_file="$2"; shift 2 ;;
--out) out="$2"; shift 2 ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
bold "Building: $title"
echo " Type : $type"
echo " Output : $out"
# Step 1: tokens
echo ""
bold "Step 1/4 Generating design tokens..."
local accent_args=()
[[ -n "$accent" ]] && accent_args+=(--accent "$accent")
[[ -n "$cover_bg" ]] && accent_args+=(--cover-bg "$cover_bg")
$PY "$SCRIPTS/palette.py" \
--title "$title" --type "$type" \
--author "$author" --date "$date" \
--out "$workdir/tokens.json" \
"${accent_args[@]+"${accent_args[@]}"}"
# Inject optional cover fields into tokens.json
if [[ -n "$abstract" || -n "$cover_image" ]]; then
PDF_ABSTRACT="$abstract" PDF_COVER_IMAGE="$cover_image" PDF_TOKENS="$workdir/tokens.json" \
$PY - <<'PYEOF'
import json, os
with open(os.environ["PDF_TOKENS"]) as f:
t = json.load(f)
abstract = os.environ.get("PDF_ABSTRACT", "")
cover_image = os.environ.get("PDF_COVER_IMAGE", "")
if abstract:
t["abstract"] = abstract
if cover_image:
t["cover_image"] = cover_image
with open(os.environ["PDF_TOKENS"], "w") as f:
json.dump(t, f, indent=2)
PYEOF
fi
cat "$workdir/tokens.json" | $PY -c "
import json,sys
t=json.load(sys.stdin)
print(f' Mood : {t[\"mood\"]}')
print(f' Pattern : {t[\"cover_pattern\"]}')
print(f' Fonts : {t[\"font_display\"]} / {t[\"font_body\"]}')"
# Step 2: cover HTML + render
echo ""
bold "Step 2/4 Rendering cover..."
local subtitle_args=()
[[ -n "$subtitle" ]] && subtitle_args=(--subtitle "$subtitle")
$PY "$SCRIPTS/cover.py" \
--tokens "$workdir/tokens.json" \
--out "$workdir/cover.html" \
"${subtitle_args[@]+"${subtitle_args[@]}"}"
$NODE "$SCRIPTS/render_cover.js" \
--input "$workdir/cover.html" \
--out "$workdir/cover.pdf"
green " ✓ Cover rendered"
# Step 3: body
echo ""
bold "Step 3/4 Rendering body pages..."
if [[ -z "$content_file" ]]; then
# Generate a minimal placeholder body
cat > "$workdir/content.json" <<'JSON'
[
{"type":"h1", "text":"Document Body"},
{"type":"body", "text":"Replace this with your content.json file using --content path/to/content.json"},
{"type":"body", "text":"See the content.json schema in the skill README for the full list of supported block types: h1, h2, h3, body, bullet, callout, table, pagebreak, spacer."}
]
JSON
content_file="$workdir/content.json"
yellow " No content file provided — using placeholder body."
fi
$PY "$SCRIPTS/render_body.py" \
--tokens "$workdir/tokens.json" \
--content "$content_file" \
--out "$workdir/body.pdf"
green " ✓ Body rendered"
# Step 4: merge
echo ""
bold "Step 4/4 Merging and QA..."
$PY "$SCRIPTS/merge.py" \
--cover "$workdir/cover.pdf" \
--body "$workdir/body.pdf" \
--out "$out" \
--title "$title"
# Cleanup
rm -rf "$workdir"
}
# ── fill ──────────────────────────────────────────────────────────────────────
cmd_fill() {
local input="" out="" values="" data_file="" inspect_only=false
while [[ $# -gt 0 ]]; do
case "$1" in
--input) input="$2"; shift 2 ;;
--out) out="$2"; shift 2 ;;
--values) values="$2"; shift 2 ;;
--data) data_file="$2"; shift 2 ;;
--inspect) inspect_only=true; shift ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
if [[ -z "$input" ]]; then
echo "Usage: make.sh fill --input form.pdf [--out filled.pdf] [--values '{...}'] [--data values.json] [--inspect]"
exit 1
fi
if $inspect_only || [[ -z "$out" && -z "$values" && -z "$data_file" ]]; then
bold "Inspecting form fields in: $input"
$PY "$SCRIPTS/fill_inspect.py" --input "$input"
return
fi
bold "Filling form: $input$out"
local val_args=""
if [[ -n "$values" ]]; then val_args="--values $values"; fi
if [[ -n "$data_file" ]]; then val_args="--data $data_file"; fi
$PY "$SCRIPTS/fill_write.py" --input "$input" --out "$out" $val_args
}
# ── reformat ───────────────────────────────────────────────────────────────────
cmd_reformat() {
local input="" title="Reformatted Document" type="general"
local author="" date="" out="output.pdf" subtitle=""
local tmpdir
tmpdir="$(mktemp -d)"
while [[ $# -gt 0 ]]; do
case "$1" in
--input) input="$2"; shift 2 ;;
--title) title="$2"; shift 2 ;;
--type) type="$2"; shift 2 ;;
--author) author="$2"; shift 2 ;;
--date) date="$2"; shift 2 ;;
--subtitle) subtitle="$2"; shift 2 ;;
--out) out="$2"; shift 2 ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
if [[ -z "$input" ]]; then
echo "Usage: make.sh reformat --input source.md --title T --type TYPE --out output.pdf"
exit 1
fi
bold "Parsing: $input"
$PY "$SCRIPTS/reformat_parse.py" --input "$input" --out "$tmpdir/content.json"
green " ✓ Parsed to content.json"
bold "Applying design and building PDF..."
local sub_args=()
[[ -n "$subtitle" ]] && sub_args=(--subtitle "$subtitle")
cmd_run \
--title "$title" --type "$type" \
--author "$author" --date "$date" \
--content "$tmpdir/content.json" \
--out "$out" \
"${sub_args[@]+"${sub_args[@]}"}"
rm -rf "$tmpdir"
}
# ── demo ──────────────────────────────────────────────────────────────────────
cmd_demo() {
local tmpdir
tmpdir="$(mktemp -d)"
cat > "$tmpdir/content.json" <<'JSON'
[
{"type":"h1", "text":"Executive Summary"},
{"type":"body", "text":"This document was generated by minimax-pdf — a skill for creating visually polished PDFs. Every design decision is rooted in the document type and content, not a generic template."},
{"type":"callout", "text":"Key insight: design tokens flow from palette.py through every renderer, keeping cover and body visually consistent."},
{"type":"h1", "text":"How It Works"},
{"type":"h2", "text":"The Token Pipeline"},
{"type":"body", "text":"The palette.py script infers a color palette and typography pair from the document type. These tokens are written to tokens.json and consumed by every downstream script."},
{"type":"numbered","text":"palette.py generates color tokens, font selection, and the cover pattern"},
{"type":"numbered","text":"cover.py renders the cover HTML using the selected pattern"},
{"type":"numbered","text":"render_cover.js uses Playwright to convert the HTML cover to PDF"},
{"type":"numbered","text":"render_body.py builds inner pages from content.json using ReportLab"},
{"type":"numbered","text":"merge.py combines cover + body and runs final QA checks"},
{"type":"h2", "text":"Cover Patterns"},
{"type":"table",
"headers": ["Pattern", "Document type", "Visual character"],
"rows": [
["fullbleed", "report, general", "Deep background · dot-grid texture"],
["split", "proposal", "Left dark panel · right dot-grid"],
["typographic", "resume, academic", "Oversized display type · first-word accent"],
["atmospheric", "portfolio", "Dark bg · radial glow · dot-grid"],
["magazine", "magazine", "Cream bg · centered · hero image"],
["darkroom", "darkroom", "Navy bg · centered · grayscale image"],
["terminal", "terminal", "Near-black · grid lines · monospace"],
["poster", "poster", "White · thick sidebar · oversized title"]
]
},
{"type":"h1", "text":"Data Visualisation"},
{"type":"h2", "text":"Performance Metrics (Chart)"},
{"type":"body", "text":"Charts are rendered natively using matplotlib with a color palette derived from the document accent. No external chart services or image files required."},
{"type":"chart",
"chart_type": "bar",
"title": "Quarterly Performance",
"labels": ["Q1", "Q2", "Q3", "Q4"],
"datasets": [
{"label": "Revenue", "values": [120, 145, 132, 178]},
{"label": "Expenses", "values": [95, 108, 99, 122]}
],
"y_label": "USD (thousands)",
"caption": "Quarterly revenue vs. expenses"
},
{"type":"h2", "text":"Market Share (Pie Chart)"},
{"type":"chart",
"chart_type": "pie",
"labels": ["Product A", "Product B", "Product C", "Other"],
"datasets": [{"values": [42, 28, 18, 12]}],
"caption": "Annual market share by product line"
},
{"type":"pagebreak"},
{"type":"h1", "text":"Mathematics"},
{"type":"body", "text":"Display math is rendered via matplotlib mathtext — no LaTeX binary installation required. Inline references use standard [N] notation in body text."},
{"type":"math", "text":"E = mc^2", "label":"(1)"},
{"type":"math", "text":"\\int_0^\\infty e^{-x^2}\\,dx = \\frac{\\sqrt{\\pi}}{2}", "label":"(2)"},
{"type":"math", "text":"\\sum_{n=1}^{\\infty} \\frac{1}{n^2} = \\frac{\\pi^2}{6}", "caption":"Basel problem (Euler, 1734)"},
{"type":"h1", "text":"Process Flow"},
{"type":"body", "text":"Flowcharts are drawn directly using matplotlib patches — no Graphviz or external tools needed. Supported node shapes: rect, diamond, oval, parallelogram."},
{"type":"flowchart",
"nodes": [
{"id":"start", "label":"Start", "shape":"oval"},
{"id":"input", "label":"Receive Input", "shape":"parallelogram"},
{"id":"valid", "label":"Valid?", "shape":"diamond"},
{"id":"proc", "label":"Process Data", "shape":"rect"},
{"id":"err", "label":"Return Error", "shape":"rect"},
{"id":"out", "label":"Return Result", "shape":"parallelogram"},
{"id":"end", "label":"End", "shape":"oval"}
],
"edges": [
{"from":"start", "to":"input"},
{"from":"input", "to":"valid"},
{"from":"valid", "to":"proc", "label":"Yes"},
{"from":"valid", "to":"err", "label":"No"},
{"from":"proc", "to":"out"},
{"from":"err", "to":"end"},
{"from":"out", "to":"end"}
],
"caption": "Data validation and processing flow"
},
{"type":"h1", "text":"Code Example"},
{"type":"code", "language":"python",
"text":"# Design token pipeline\ntokens = palette.build_tokens(\n title=\"Annual Report\",\n doc_type=\"report\",\n author=\"J. Smith\",\n date=\"March 2026\",\n)\nhtml = cover.render(tokens)\npdf = render_cover(html)"},
{"type":"h1", "text":"Design Principles"},
{"type":"body", "text":"The aesthetic system is documented in design/design.md. The core rule: every design decision must be rooted in the document content and purpose. A color chosen because it fits the content will always outperform a color chosen because it seems safe."},
{"type":"h2", "text":"Restraint over decoration"},
{"type":"body", "text":"The page is done when there is nothing left to remove. Accent color appears on section rules only — not on headings, not on bullets. No card components, no drop shadows."},
{"type":"callout", "text":"A PDF passes the quality bar when a designer would not be embarrassed to hand it to a client."},
{"type":"pagebreak"},
{"type":"bibliography",
"title": "References",
"items": [
{"id":"1","text":"Bringhurst, R. (2004). The Elements of Typographic Style (3rd ed.). Hartley & Marks."},
{"id":"2","text":"Cairo, A. (2016). The Truthful Art: Data, Charts, and Maps for Communication. New Riders."},
{"id":"3","text":"Hochuli, J. & Kinross, R. (1996). Designing Books: Practice and Theory. Hyphen Press."}
]
}
]
JSON
cmd_run \
--title "minimax-pdf demo" \
--type "report" \
--author "minimax-pdf skill" \
--date "$(date '+%B %Y')" \
--subtitle "A demonstration of the token-based design pipeline" \
--content "$tmpdir/content.json" \
--out "demo.pdf"
rm -rf "$tmpdir"
}
# ── dispatch ───────────────────────────────────────────────────────────────────
main() {
if [[ $# -lt 1 ]]; then
bold "minimax-pdf — make.sh"
echo ""
echo "Usage: bash make.sh <command> [options]"
echo ""
echo "Commands:"
echo " check Verify all dependencies"
echo " fix Auto-install missing deps"
echo " run --title T --type TYPE CREATE: full pipeline → PDF"
echo " [--author A] [--date D] [--subtitle S]"
echo " [--abstract A] [--cover-image URL]"
echo " [--accent #HEX] [--cover-bg #HEX]"
echo " [--content content.json] [--out output.pdf]"
echo " fill --input f.pdf FILL: inspect or fill form fields"
echo " reformat --input doc.md REFORMAT: parse doc → apply design → PDF"
echo " demo Build a full-featured demo PDF"
exit 0
fi
case "$1" in
check) cmd_check ;;
fix) cmd_fix ;;
run) shift; cmd_run "$@" ;;
fill) shift; cmd_fill "$@" ;;
reformat) shift; cmd_reformat "$@" ;;
demo) cmd_demo ;;
*) echo "Unknown command: $1"; exit 1 ;;
esac
}
main "$@"

View File

@@ -0,0 +1,112 @@
#!/usr/bin/env python3
"""
merge.py — Merge cover.pdf + body.pdf → final.pdf and print a QA report.
Usage:
python3 merge.py --cover cover.pdf --body body.pdf --out final.pdf
python3 merge.py --cover cover.pdf --body body.pdf --out final.pdf --title "My Report"
Exit codes: 0 success, 1 bad args/missing file, 2 missing dep, 3 merge error
"""
import argparse
import importlib.util
import json
import os
import sys
def ensure_deps():
if importlib.util.find_spec("pypdf") is None:
import subprocess
subprocess.check_call(
[sys.executable, "-m", "pip", "install", "--break-system-packages", "-q", "pypdf"]
)
ensure_deps()
from pypdf import PdfWriter, PdfReader
def merge(cover_path: str, body_path: str, out_path: str, title: str = "") -> dict:
writer = PdfWriter()
for fpath, label in [(cover_path, "cover"), (body_path, "body")]:
if not os.path.exists(fpath):
return {"status": "error", "error": f"{label} file not found: {fpath}"}
reader = PdfReader(fpath)
for page in reader.pages:
writer.add_page(page)
# Set PDF metadata
if title:
writer.add_metadata({"/Title": title})
os.makedirs(os.path.dirname(os.path.abspath(out_path)), exist_ok=True)
with open(out_path, "wb") as f:
writer.write(f)
size_kb = os.path.getsize(out_path) // 1024
total_pages = len(writer.pages)
# ── QA checks ─────────────────────────────────────────────────────────────
warnings = []
# Page count sanity
cover_pages = len(PdfReader(cover_path).pages)
body_pages = len(PdfReader(body_path).pages)
if cover_pages != 1:
warnings.append(f"Cover PDF has {cover_pages} pages (expected 1)")
# File size sanity
if size_kb < 20:
warnings.append(f"Output is very small ({size_kb} KB) — may have blank pages")
if size_kb > 50_000:
warnings.append(f"Output is very large ({size_kb} KB) — consider compressing images")
report = {
"status": "ok",
"out": out_path,
"total_pages": total_pages,
"cover_pages": cover_pages,
"body_pages": body_pages,
"size_kb": size_kb,
}
if warnings:
report["warnings"] = warnings
return report
def main():
parser = argparse.ArgumentParser(description="Merge cover + body PDFs")
parser.add_argument("--cover", required=True)
parser.add_argument("--body", required=True)
parser.add_argument("--out", required=True)
parser.add_argument("--title", default="")
args = parser.parse_args()
result = merge(args.cover, args.body, args.out, args.title)
if result["status"] == "error":
print(json.dumps(result), file=sys.stderr)
sys.exit(3)
print(json.dumps(result))
# Human-readable QA summary
print(f"\n── Build complete ──────────────────────────────────────")
print(f" Output : {result['out']}")
print(f" Pages : {result['total_pages']} total (1 cover + {result['body_pages']} body)")
print(f" Size : {result['size_kb']} KB")
if result.get("warnings"):
print(f" ⚠ Warnings:")
for w in result["warnings"]:
print(f"{w}")
else:
print(f" ✓ No issues detected")
print(f"────────────────────────────────────────────────────────\n")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,521 @@
#!/usr/bin/env python3
"""
palette.py — Infer design tokens from document metadata.
Usage:
python3 palette.py --title "AI Trends 2025" --type report --out tokens.json
python3 palette.py --title "John Doe Resume" --type resume --out tokens.json
python3 palette.py --meta meta.json --out tokens.json
Outputs tokens.json consumed by all downstream scripts.
Cover fonts are loaded via Google Fonts @import in the cover HTML (no local caching).
Body fonts always use ReportLab system fonts (Times-Bold / Helvetica).
Exit codes: 0 success, 1 bad args, 3 write error
"""
import argparse
import json
import sys
# ── Palette library ────────────────────────────────────────────────────────────
# Each entry: cover colors + cover_pattern + mood
PALETTES = {
"report": {
# Charcoal blue-grey cover; muted steel blue accent — authoritative, not flashy
"cover_bg": "#1B2A38",
"accent": "#3B6D8A",
"accent_lt": "#E6EFF5",
"text_light": "#EDE9E2",
"page_bg": "#FAFAF8",
"dark": "#1A1E24",
"body_text": "#2C2C30",
"muted": "#7A7A84",
"cover_pattern": "fullbleed",
"mood": "authoritative",
},
"proposal": {
# Dark charcoal cover; slate grey-blue accent — confident, understated
"cover_bg": "#22272E",
"accent": "#4E6070",
"accent_lt": "#EAECEE",
"text_light": "#EDE9E2",
"page_bg": "#FAFAF7",
"dark": "#18191E",
"body_text": "#28282E",
"muted": "#7A7870",
"cover_pattern": "split",
"mood": "confident",
},
"resume": {
# White; deep navy accent — clean and unambiguous
"cover_bg": "#FFFFFF",
"accent": "#1C3557",
"accent_lt": "#E8EEF5",
"text_light": "#FFFFFF",
"page_bg": "#FFFFFF",
"dark": "#111111",
"body_text": "#222222",
"muted": "#888888",
"cover_pattern": "typographic",
"mood": "clean",
},
"portfolio": {
# Near-black charcoal; cool slate grey accent — subdued professional
"cover_bg": "#191C20",
"accent": "#6A7A88",
"accent_lt": "#EAECEE",
"text_light": "#EDE9E4",
"page_bg": "#F8F8F8",
"dark": "#18191E",
"body_text": "#28282E",
"muted": "#8A8A96",
"cover_pattern": "atmospheric",
"mood": "expressive",
},
"academic": {
# Warm white; classic navy accent — scholarly standard
"cover_bg": "#F5F4F0",
"accent": "#2A436A",
"accent_lt": "#E6EBF4",
"text_light": "#FFFFFF",
"page_bg": "#F5F4F0",
"dark": "#1A1A28",
"body_text": "#1E1E2A",
"muted": "#686877",
"cover_pattern": "typographic",
"mood": "scholarly",
},
"general": {
# Dark slate; muted steel accent — neutral, no-nonsense
"cover_bg": "#1F2329",
"accent": "#4A6070",
"accent_lt": "#E6EAEC",
"text_light": "#EEEBE5",
"page_bg": "#F8F6F2",
"dark": "#1A1A1A",
"body_text": "#2C2C2C",
"muted": "#888888",
"cover_pattern": "fullbleed",
"mood": "neutral",
},
# ── Extended types — each uses a distinct new cover pattern ─────────────────
"minimal": {
# Warm off-white; dark neutral grey — truly restrained, no color signal
"cover_bg": "#F7F6F4",
"accent": "#4A4A4A",
"accent_lt": "#EBEBEA",
"text_light": "#F7F6F4",
"page_bg": "#F7F6F4",
"dark": "#111111",
"body_text": "#222222",
"muted": "#999999",
"cover_pattern": "minimal",
"mood": "restrained",
},
"stripe": {
# Near-black; charcoal slate accent — structured, no-nonsense
"cover_bg": "#1E222A",
"accent": "#4A5568",
"accent_lt": "#EAECEE",
"text_light": "#FFFFFF",
"page_bg": "#F8F8F7",
"dark": "#0E1117",
"body_text": "#262630",
"muted": "#888898",
"cover_pattern": "stripe",
"mood": "bold",
},
"diagonal": {
# Deep navy; muted slate-blue accent — dignified, controlled
"cover_bg": "#1A2535",
"accent": "#3D5A72",
"accent_lt": "#E4EBF0",
"text_light": "#EEF0F5",
"page_bg": "#F8FAFC",
"dark": "#0F1A2A",
"body_text": "#1E2C3A",
"muted": "#7A8A96",
"cover_pattern": "diagonal",
"mood": "dynamic",
},
"frame": {
# Warm parchment; dark muted brown — classical, formal
"cover_bg": "#F5F2EC",
"accent": "#5C4A38",
"accent_lt": "#EAE5DE",
"text_light": "#F5F2EC",
"page_bg": "#F5F2EC",
"dark": "#2A1E14",
"body_text": "#2C2018",
"muted": "#9A8A78",
"cover_pattern": "frame",
"mood": "classical",
},
"editorial": {
# White; deep burgundy accent — editorial weight without the shout
"cover_bg": "#FFFFFF",
"accent": "#7A2B36",
"accent_lt": "#EEE4E5",
"text_light": "#FFFFFF",
"page_bg": "#FFFFFF",
"dark": "#0A0A0A",
"body_text": "#1A1A1A",
"muted": "#777777",
"cover_pattern": "editorial",
"mood": "editorial",
},
# ── New patterns (v2) ────────────────────────────────────────────────────────
"magazine": {
# Warm linen; deep navy accent — formal publication standard
"cover_bg": "#F0EEE9",
"accent": "#1C3557",
"accent_lt": "#E4EBF3",
"text_light": "#FFFFFF",
"page_bg": "#F0EEE9",
"dark": "#0D1A2B",
"body_text": "#2A2A2A",
"muted": "#888888",
"cover_pattern": "magazine",
"mood": "magazine",
},
"darkroom": {
# Deep navy; muted steel-blue accent — premium, controlled
"cover_bg": "#151C27",
"accent": "#3D5A7A",
"accent_lt": "#E2EBF2",
"text_light": "#EDE9E2",
"page_bg": "#F7F7F5",
"dark": "#0A1018",
"body_text": "#2C2C2C",
"muted": "#8A9AB0",
"cover_pattern": "darkroom",
"mood": "darkroom",
},
"terminal": {
# Near-black; forest green accent — technical, serious (not neon)
"cover_bg": "#0D1117",
"accent": "#3D7A5C",
"accent_lt": "#E2EEE8",
"text_light": "#E6EDF3",
"page_bg": "#F8F8F6",
"dark": "#010409",
"body_text": "#2C2C2C",
"muted": "#5A7A6A",
"cover_pattern": "terminal",
"mood": "terminal",
},
"poster": {
# White; near-black accent sidebar — stark, unambiguous
"cover_bg": "#FFFFFF",
"accent": "#0A0A0A",
"accent_lt": "#EBEBEA",
"text_light": "#FFFFFF",
"page_bg": "#FFFFFF",
"dark": "#0A0A0A",
"body_text": "#1A1A1A",
"muted": "#888888",
"cover_pattern": "poster",
"mood": "poster",
},
}
# ── Font pairs — CSS names for cover HTML, ReportLab names for body ─────────────
# cover uses Google Fonts via @import (no local disk caching needed)
# body always uses system fonts via ReportLab
FONT_PAIRS = {
"authoritative": {
"display_css": "Playfair Display",
"body_css": "IBM Plex Sans",
"gfonts_import": "https://fonts.googleapis.com/css2?family=Playfair+Display:wght@700;900&family=IBM+Plex+Sans:ital,wght@0,400;0,600;1,400&display=swap",
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
},
"confident": {
"display_css": "Syne",
"body_css": "Nunito Sans",
"gfonts_import": "https://fonts.googleapis.com/css2?family=Syne:wght@600;800&family=Nunito+Sans:wght@400;600;700&display=swap",
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
},
"clean": {
"display_css": "DM Serif Display",
"body_css": "DM Sans",
"gfonts_import": "https://fonts.googleapis.com/css2?family=DM+Serif+Display&family=DM+Sans:wght@300;400;500&display=swap",
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
},
"expressive": {
"display_css": "Fraunces",
"body_css": "Inter",
"gfonts_import": "https://fonts.googleapis.com/css2?family=Fraunces:ital,wght@0,700;0,900;1,900&family=Inter:wght@300;400;500&display=swap",
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
},
"scholarly": {
"display_css": "EB Garamond",
"body_css": "Source Sans 3",
"gfonts_import": "https://fonts.googleapis.com/css2?family=EB+Garamond:ital,wght@0,400;0,700;1,400&family=Source+Sans+3:wght@400;600&display=swap",
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
},
"neutral": {
"display_css": "Outfit",
"body_css": "Outfit",
"gfonts_import": "https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;700;900&display=swap",
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
},
"restrained": {
"display_css": "Cormorant Garamond",
"body_css": "Jost",
"gfonts_import": "https://fonts.googleapis.com/css2?family=Cormorant+Garamond:ital,wght@0,300;0,600;1,300&family=Jost:wght@300;400;500&display=swap",
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
},
"bold": {
"display_css": "Barlow Condensed",
"body_css": "Barlow",
"gfonts_import": "https://fonts.googleapis.com/css2?family=Barlow+Condensed:wght@700;900&family=Barlow:wght@400;500;600&display=swap",
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
},
"dynamic": {
"display_css": "Montserrat",
"body_css": "Montserrat",
"gfonts_import": "https://fonts.googleapis.com/css2?family=Montserrat:ital,wght@0,300;0,700;0,900;1,400&display=swap",
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
},
"classical": {
"display_css": "Cormorant",
"body_css": "Crimson Pro",
"gfonts_import": "https://fonts.googleapis.com/css2?family=Cormorant:ital,wght@0,400;0,700;1,400&family=Crimson+Pro:wght@400;600&display=swap",
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
},
"editorial": {
"display_css": "Bebas Neue",
"body_css": "Libre Franklin",
"gfonts_import": (
"https://fonts.googleapis.com/css2?family=Bebas+Neue"
"&family=Libre+Franklin:ital,wght@0,400;0,700;1,400&display=swap"
),
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
},
# ── New moods (v2) ───────────────────────────────────────────────────────────
"magazine": {
"display_css": "Playfair Display",
"body_css": "EB Garamond",
"gfonts_import": (
"https://fonts.googleapis.com/css2?family=Playfair+Display"
":ital,wght@0,700;0,900;1,700"
"&family=EB+Garamond:ital,wght@0,400;0,600;1,400&display=swap"
),
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
},
"darkroom": {
"display_css": "Playfair Display",
"body_css": "EB Garamond",
"gfonts_import": (
"https://fonts.googleapis.com/css2?family=Playfair+Display"
":ital,wght@0,700;0,900;1,700"
"&family=EB+Garamond:ital,wght@0,400;0,600;1,400&display=swap"
),
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
},
"terminal": {
"display_css": "Space Mono",
"body_css": "Space Mono",
"gfonts_import": (
"https://fonts.googleapis.com/css2?family=Space+Mono"
":ital,wght@0,400;0,700;1,400&display=swap"
),
"display_rl": "Courier-Bold",
"body_rl": "Courier",
"body_b_rl": "Courier-Bold",
},
"poster": {
"display_css": "Barlow Condensed",
"body_css": "Courier Prime",
"gfonts_import": (
"https://fonts.googleapis.com/css2?family=Barlow+Condensed"
":wght@700;900"
"&family=Courier+Prime:ital,wght@0,400;0,700;1,400&display=swap"
),
"display_rl": "Times-Bold",
"body_rl": "Courier",
"body_b_rl": "Courier-Bold",
},
}
SYSTEM_FALLBACK = {
"display_css": "Georgia",
"body_css": "Arial",
"gfonts_import": "",
"display_rl": "Times-Bold",
"body_rl": "Helvetica",
"body_b_rl": "Helvetica-Bold",
}
# ── Colour helpers ──────────────────────────────────────────────────────────────
def _hex_to_rgb(h: str) -> tuple:
h = h.lstrip("#")
return int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
def _lighten(hex_color: str, factor: float = 0.09) -> str:
"""Blend hex_color toward white (factor = accent weight, 0=white, 1=full color)."""
r, g, b = _hex_to_rgb(hex_color)
return "#{:02X}{:02X}{:02X}".format(
round(r * factor + 255 * (1 - factor)),
round(g * factor + 255 * (1 - factor)),
round(b * factor + 255 * (1 - factor)),
)
# ── Token assembly ─────────────────────────────────────────────────────────────
def build_tokens(
title: str,
doc_type: str,
author: str = "",
date: str = "",
accent_override: str = "",
cover_bg_override: str = "",
) -> dict:
palette = PALETTES.get(doc_type, PALETTES["general"]).copy()
mood = palette["mood"]
font_pair = FONT_PAIRS.get(mood, SYSTEM_FALLBACK)
# Apply caller-supplied overrides before token assembly
if accent_override:
palette["accent"] = accent_override
palette["accent_lt"] = _lighten(accent_override, 0.09)
if cover_bg_override:
palette["cover_bg"] = cover_bg_override
tokens = {
# Identity
"title": title,
"author": author,
"date": date,
"doc_type": doc_type,
# Palette
"cover_bg": palette["cover_bg"],
"accent": palette["accent"],
"accent_lt": palette["accent_lt"],
"text_light": palette["text_light"],
"page_bg": palette["page_bg"],
"dark": palette["dark"],
"body_text": palette["body_text"],
"muted": palette["muted"],
"cover_pattern": palette["cover_pattern"],
"mood": mood,
# Typography — CSS names for cover HTML (loaded via Google Fonts @import)
"font_display": font_pair["display_css"],
"font_body": font_pair["body_css"],
"gfonts_import": font_pair["gfonts_import"],
# Typography — ReportLab system font names for body pages
"font_display_rl": font_pair["display_rl"],
"font_body_rl": font_pair["body_rl"],
"font_body_b_rl": font_pair["body_b_rl"],
# Legacy keys (kept so render_body.py's register_fonts is a no-op)
"font_heading": font_pair["display_rl"],
"font_body_b": font_pair["body_b_rl"],
"font_paths": {},
# Type scale (pt)
"size_display": 54,
"size_h1": 22,
"size_h2": 15,
"size_h3": 11.5,
"size_body": 10.5,
"size_caption": 8.5,
"size_meta": 8,
# Layout (pt, 1cm ≈ 28.35pt)
"margin_left": 79, # 2.8cm
"margin_right": 79,
"margin_top": 79,
"margin_bottom": 71, # 2.5cm
"section_gap": 26,
"para_gap": 8,
"line_gap": 17,
}
return tokens
# ── CLI ───────────────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Generate design tokens from document metadata")
parser.add_argument("--title", default="Untitled Document")
parser.add_argument("--type", default="general",
choices=list(PALETTES.keys()),
help="Document type: " + ", ".join(PALETTES.keys()))
parser.add_argument("--author", default="")
parser.add_argument("--date", default="")
parser.add_argument("--meta", help="JSON file with title/type/author/date keys")
parser.add_argument("--accent", default="",
help="Override accent colour (hex, e.g. #2D6A8F). "
"accent_lt is auto-derived by lightening toward white.")
parser.add_argument("--cover-bg", default="",
help="Override cover background colour (hex).")
parser.add_argument("--out", default="tokens.json")
args = parser.parse_args()
if args.meta:
try:
with open(args.meta) as f:
meta = json.load(f)
args.title = meta.get("title", args.title)
args.type = meta.get("type", args.type)
args.author = meta.get("author", args.author)
args.date = meta.get("date", args.date)
except Exception as e:
print(json.dumps({"status": "error", "error": str(e)}), file=sys.stderr)
sys.exit(1)
tokens = build_tokens(
args.title, args.type, args.author, args.date,
accent_override=args.accent,
cover_bg_override=getattr(args, "cover_bg", ""),
)
try:
with open(args.out, "w") as f:
json.dump(tokens, f, indent=2)
except Exception as e:
print(json.dumps({"status": "error", "error": str(e)}), file=sys.stderr)
sys.exit(3)
print(json.dumps({
"status": "ok",
"out": args.out,
"mood": tokens["mood"],
"pattern": tokens["cover_pattern"],
"fonts": f'{tokens["font_display"]} / {tokens["font_body"]}',
}))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,374 @@
#!/usr/bin/env python3
"""
reformat_parse.py — Convert an existing document into content.json,
then hand off to the CREATE pipeline (render_body.py).
Supported input formats:
.md / .txt — Markdown / plain text
.pdf — Extract text from existing PDF (layout preserved as best-effort)
.json — Pass-through if already content.json format
Usage:
python3 reformat_parse.py --input doc.md --out content.json
python3 reformat_parse.py --input old.pdf --out content.json
python3 reformat_parse.py --input data.json --out content.json
Then pipe into the CREATE pipeline:
python3 render_body.py --tokens tokens.json --content content.json --out body.pdf
Or use make.sh reformat which does both steps:
bash make.sh reformat --input doc.md --type report --title "My Report" --out output.pdf
Exit codes: 0 success, 1 bad args / unsupported format, 2 dep missing, 3 parse error
"""
import argparse
import json
import os
import re
import sys
import importlib.util
from pathlib import Path
def ensure_deps():
missing = []
if importlib.util.find_spec("pypdf") is None:
missing.append("pypdf")
if missing:
import subprocess
subprocess.check_call(
[sys.executable, "-m", "pip", "install", "--break-system-packages", "-q"] + missing
)
ensure_deps()
# ── Markdown / plain text parser ───────────────────────────────────────────────
def parse_markdown(text: str) -> list:
"""
Convert Markdown to content.json blocks.
Supports: # headings, **bold**, bullet lists, > blockquotes (→ callout),
| tables |, plain paragraphs.
"""
blocks = []
lines = text.splitlines()
i = 0
def flush_para(buf: list):
t = " ".join(buf).strip()
if t:
blocks.append({"type": "body", "text": _md_inline(t)})
para_buf = []
while i < len(lines):
line = lines[i]
stripped = line.strip()
# Blank line — flush paragraph buffer
if not stripped:
flush_para(para_buf)
para_buf = []
i += 1
continue
# ATX Headings: # ## ###
m = re.match(r'^(#{1,3})\s+(.*)', stripped)
if m:
flush_para(para_buf)
para_buf = []
level = len(m.group(1))
htype = {1: "h1", 2: "h2", 3: "h3"}.get(level, "h3")
blocks.append({"type": htype, "text": _md_inline(m.group(2))})
i += 1
continue
# Display math block: $$expr$$ on one line, or opening $$ ... closing $$
if stripped.startswith("$$"):
flush_para(para_buf)
para_buf = []
inline_expr = stripped[2:].rstrip("$").strip()
if inline_expr:
# Single-line: $$E = mc^2$$
blocks.append({"type": "math", "text": inline_expr})
i += 1
else:
# Multi-line: opening $$ alone, then expression lines, then closing $$
math_lines = []
i += 1
while i < len(lines) and lines[i].strip() != "$$":
math_lines.append(lines[i])
i += 1
if i < len(lines):
i += 1 # skip closing $$
blocks.append({"type": "math", "text": "\n".join(math_lines).strip()})
continue
# Fenced code block: ``` or ~~~
if stripped.startswith("```") or stripped.startswith("~~~"):
flush_para(para_buf)
para_buf = []
fence = stripped[:3]
code_lines = []
i += 1
while i < len(lines) and not lines[i].strip().startswith(fence):
code_lines.append(lines[i])
i += 1
if i < len(lines):
i += 1 # skip closing fence
blocks.append({"type": "code", "text": "\n".join(code_lines)})
continue
# Blockquote → callout
if stripped.startswith(">"):
flush_para(para_buf)
para_buf = []
qt = re.sub(r'^>\s*', '', stripped)
blocks.append({"type": "callout", "text": _md_inline(qt)})
i += 1
continue
# Unordered bullet: -, *, +
if re.match(r'^[-*+]\s+', stripped):
flush_para(para_buf)
para_buf = []
text_part = re.sub(r'^[-*+]\s+', '', stripped)
blocks.append({"type": "bullet", "text": _md_inline(text_part)})
i += 1
continue
# Ordered list: 1. 2. etc. → numbered (preserves counter in render_body)
if re.match(r'^\d+\.\s+', stripped):
flush_para(para_buf)
para_buf = []
text_part = re.sub(r'^\d+\.\s+', '', stripped)
blocks.append({"type": "numbered", "text": _md_inline(text_part)})
i += 1
continue
# Table: | col | col |
if stripped.startswith("|"):
flush_para(para_buf)
para_buf = []
table_lines = []
while i < len(lines) and lines[i].strip().startswith("|"):
table_lines.append(lines[i].strip())
i += 1
# Remove separator rows (|---|---|)
data_rows = [r for r in table_lines if not re.match(r'^\|[-:| ]+\|$', r)]
parsed = []
for row in data_rows:
cells = [c.strip() for c in row.strip("|").split("|")]
parsed.append(cells)
if len(parsed) >= 2:
blocks.append({
"type": "table",
"headers": parsed[0],
"rows": parsed[1:],
})
elif len(parsed) == 1:
# Single row — treat as paragraph
blocks.append({"type": "body", "text": " | ".join(parsed[0])})
continue
# Horizontal rule → spacer
if re.match(r'^[-*_]{3,}$', stripped):
flush_para(para_buf)
para_buf = []
blocks.append({"type": "spacer", "pt": 16})
i += 1
continue
# Plain text → accumulate into paragraph
para_buf.append(stripped)
i += 1
flush_para(para_buf)
return blocks
def _md_inline(text: str) -> str:
"""Convert inline Markdown to ReportLab XML markup."""
# Bold: **text** or __text__
text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
text = re.sub(r'__(.+?)__', r'<b>\1</b>', text)
# Italic: *text* or _text_
text = re.sub(r'\*(.+?)\*', r'<i>\1</i>', text)
text = re.sub(r'_(.+?)_', r'<i>\1</i>', text)
# Inline code: `code`
text = re.sub(r'`(.+?)`', r'<font name="Courier">\1</font>', text)
# Strip markdown links, keep text
text = re.sub(r'\[(.+?)\]\(.+?\)', r'\1', text)
return text
# ── PDF text extractor ─────────────────────────────────────────────────────────
def parse_pdf(pdf_path: str) -> list:
"""
Extract text from an existing PDF and convert to content.json blocks.
Best-effort: detects headings by font size heuristics if available,
otherwise falls back to paragraph splitting.
"""
from pypdf import PdfReader
reader = PdfReader(pdf_path)
all_text = []
for page in reader.pages:
text = page.extract_text()
if text:
all_text.append(text.strip())
full_text = "\n\n".join(all_text)
# Treat extracted PDF text as plain text / light markdown
# (most PDFs lose formatting — we do our best)
return parse_plain(full_text)
def parse_plain(text: str) -> list:
"""
Heuristic plain-text parser.
Short ALL-CAPS or title-case lines → headings.
Everything else → paragraphs.
"""
blocks = []
paragraphs = re.split(r'\n{2,}', text.strip())
for para in paragraphs:
para = para.strip()
if not para:
continue
lines = para.splitlines()
# Single short line that looks like a heading
if len(lines) == 1 and len(para) < 80:
if para.isupper() or re.match(r'^[A-Z][^.!?]*$', para):
blocks.append({"type": "h1", "text": para.title()})
continue
# Bullet lists
if lines[0].startswith(("- ", "", "* ")):
for line in lines:
text_part = re.sub(r'^[-•*]\s+', '', line.strip())
if text_part:
blocks.append({"type": "bullet", "text": text_part})
continue
# Regular paragraph
blocks.append({"type": "body", "text": " ".join(lines)})
return blocks
# ── Pass-through validator ─────────────────────────────────────────────────────
VALID_TYPES = {"h1","h2","h3","body","bullet","numbered","callout","table",
"image","code","math","divider","caption","pagebreak","spacer"}
def validate_content_json(data: list) -> tuple[list, list]:
"""Return (valid_blocks, warnings)."""
valid, warnings = [], []
for i, block in enumerate(data):
if not isinstance(block, dict):
warnings.append(f"Block {i}: not a dict, skipped")
continue
btype = block.get("type")
if btype not in VALID_TYPES:
warnings.append(f"Block {i}: unknown type '{btype}', kept as-is")
valid.append(block)
return valid, warnings
# ── Dispatcher ─────────────────────────────────────────────────────────────────
def parse_file(input_path: str) -> tuple[list, list]:
"""Return (blocks, warnings)."""
ext = Path(input_path).suffix.lower()
if ext in (".md", ".txt", ".markdown"):
with open(input_path, encoding="utf-8", errors="replace") as f:
text = f.read()
blocks = parse_markdown(text)
return blocks, []
if ext == ".pdf":
blocks = parse_pdf(input_path)
return blocks, ["PDF text extraction is best-effort — review content.json before rendering"]
if ext == ".json":
with open(input_path) as f:
data = json.load(f)
if isinstance(data, list):
return validate_content_json(data)
# Maybe it's a meta-wrapper {"content": [...]}
if isinstance(data, dict) and "content" in data:
return validate_content_json(data["content"])
return [], [f"JSON file does not contain a list of content blocks"]
return [], [f"Unsupported file type: {ext}. Supported: .md .txt .pdf .json"]
# ── CLI ────────────────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Parse a document into content.json")
parser.add_argument("--input", required=True, help="Input file (.md, .txt, .pdf, .json)")
parser.add_argument("--out", default="content.json", help="Output content.json path")
args = parser.parse_args()
if not os.path.exists(args.input):
print(json.dumps({"status": "error", "error": f"File not found: {args.input}"}),
file=sys.stderr)
sys.exit(1)
try:
blocks, warnings = parse_file(args.input)
except Exception as e:
import traceback
print(json.dumps({"status": "error", "error": str(e),
"trace": traceback.format_exc()}), file=sys.stderr)
sys.exit(3)
if not blocks:
print(json.dumps({
"status": "error",
"error": "No content blocks extracted",
"warnings": warnings,
}), file=sys.stderr)
sys.exit(3)
with open(args.out, "w", encoding="utf-8") as f:
json.dump(blocks, f, indent=2, ensure_ascii=False)
result = {
"status": "ok",
"out": args.out,
"block_count": len(blocks),
"warnings": warnings,
}
print(json.dumps(result, indent=2))
print(f"\n── Parsed {args.input} ─────────────────────────────────────",
file=sys.stderr)
print(f" Blocks : {len(blocks)}", file=sys.stderr)
type_counts: dict = {}
for b in blocks:
type_counts[b.get("type","?")] = type_counts.get(b.get("type","?"), 0) + 1
for t, n in sorted(type_counts.items()):
print(f" {t:12} × {n}", file=sys.stderr)
if warnings:
print(f" Warnings:", file=sys.stderr)
for w in warnings:
print(f"{w}", file=sys.stderr)
print(f"\n Next: bash make.sh run --content {args.out} --title '...' --type ...",
file=sys.stderr)
print("", file=sys.stderr)
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,111 @@
#!/usr/bin/env node
/**
* render_cover.js — Render cover.html → cover.pdf via Playwright.
*
* Usage:
* node render_cover.js --input cover.html --out cover.pdf
* node render_cover.js --input cover.html --out cover.pdf --wait 1200
*
* Exit codes: 0 success, 1 bad args, 2 dependency missing, 3 render error
*/
const path = require("path");
const fs = require("fs");
function usage() {
console.error("Usage: node render_cover.js --input <file.html> --out <file.pdf> [--wait <ms>]");
process.exit(1);
}
// ── Arg parsing ────────────────────────────────────────────────────────────────
const args = process.argv.slice(2);
let inputFile = null, outFile = null, waitMs = 800;
for (let i = 0; i < args.length; i++) {
if (args[i] === "--input" && args[i + 1]) { inputFile = args[++i]; }
else if (args[i] === "--out" && args[i + 1]) { outFile = args[++i]; }
else if (args[i] === "--wait" && args[i + 1]) { waitMs = parseInt(args[++i], 10); }
}
if (!inputFile || !outFile) usage();
if (!fs.existsSync(inputFile)) {
console.error(JSON.stringify({ status: "error", error: `File not found: ${inputFile}` }));
process.exit(1);
}
// ── Playwright loader (tolerates global npm installs) ─────────────────────────
function loadPlaywright() {
const { execSync } = require("child_process");
try { return require("playwright"); } catch (_) {}
try {
const root = execSync("npm root -g", { stdio: ["ignore","pipe","ignore"] }).toString().trim();
return require(path.join(root, "playwright"));
} catch (_) {}
console.error(JSON.stringify({
status: "error",
error: "playwright not found",
hint: "Run: npm install -g playwright && npx playwright install chromium"
}));
process.exit(2);
}
// ── Main ───────────────────────────────────────────────────────────────────────
(async () => {
const { chromium } = loadPlaywright();
let browser;
try {
browser = await chromium.launch();
} catch (e) {
// Chromium binary missing — try installing
const { spawnSync } = require("child_process");
const r = spawnSync("npx", ["playwright", "install", "chromium"], { stdio: "inherit", shell: true });
if (r.status !== 0) {
console.error(JSON.stringify({
status: "error",
error: "Chromium not installed and auto-install failed",
hint: "Run: npx playwright install chromium"
}));
process.exit(2);
}
browser = await chromium.launch();
}
try {
const page = await browser.newPage();
const fileUrl = "file://" + path.resolve(inputFile);
await page.goto(fileUrl);
await page.waitForTimeout(waitMs); // let CSS + any JS settle
await page.pdf({
path: outFile,
width: "794px",
height: "1123px",
printBackground: true,
});
await browser.close();
// Basic sanity: output file must exist and be > 5 KB
const stat = fs.statSync(outFile);
if (stat.size < 5000) {
console.error(JSON.stringify({
status: "error",
error: "Output PDF is suspiciously small — cover may be blank",
hint: "Check cover.html for render errors"
}));
process.exit(3);
}
console.log(JSON.stringify({
status: "ok",
out: outFile,
size_kb: Math.round(stat.size / 1024),
}));
} catch (e) {
if (browser) await browser.close().catch(() => {});
console.error(JSON.stringify({ status: "error", error: String(e) }));
process.exit(3);
}
})();