Files
skills/minimax-pdf/scripts/fill_write.py
shihao 6487becf60 Initial commit: add all skills files
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 16:52:49 +08:00

243 lines
8.3 KiB
Python

#!/usr/bin/env python3
"""
fill_write.py — Write values into PDF form fields.
Usage:
# From a JSON data file
python3 fill_write.py --input form.pdf --data values.json --out filled.pdf
# Inline JSON
python3 fill_write.py --input form.pdf --out filled.pdf \
--values '{"FirstName": "Jane", "Agree": "true"}'
values format:
{
"FieldName": "text value", # text field
"CheckBox1": "true", # checkbox (true / false)
"Dropdown1": "OptionValue", # dropdown (must match an existing choice value)
"Radio1": "/Choice2" # radio (must match a radio value)
}
Exit codes: 0 success, 1 bad args, 2 dep missing, 3 read/write error, 4 validation error
"""
import argparse
import json
import os
import sys
import importlib.util
def ensure_deps():
if importlib.util.find_spec("pypdf") is None:
import subprocess
subprocess.check_call(
[sys.executable, "-m", "pip", "install", "--break-system-packages", "-q", "pypdf"]
)
ensure_deps()
from pypdf import PdfReader, PdfWriter
from pypdf.generic import NameObject, TextStringObject, BooleanObject
# ── Field helpers ─────────────────────────────────────────────────────────────
def _field_type(field) -> str:
ft = str(field.get("/FT", ""))
if ft == "/Tx": return "text"
if ft == "/Btn":
ff = int(field.get("/Ff", 0))
return "radio" if ff & (1 << 15) else "checkbox"
if ft == "/Ch":
ff = int(field.get("/Ff", 0))
return "dropdown" if ff & (1 << 17) else "listbox"
return "unknown"
def _get_checkbox_on_value(field) -> str:
"""Return the /AP /N key that means 'checked' (anything except /Off)."""
ap = field.get("/AP")
if ap and "/N" in ap:
for k in ap["/N"]:
if str(k) != "/Off":
return str(k)
return "/Yes"
def _get_dropdown_values(field) -> list[str]:
opt = field.get("/Opt")
if not opt:
return []
values = []
for item in opt:
try:
from pypdf.generic import ArrayObject
if isinstance(item, (list, ArrayObject)) and len(item) >= 1:
values.append(str(item[0]))
else:
values.append(str(item))
except Exception:
values.append(str(item))
return values
# ── Walk + fill ───────────────────────────────────────────────────────────────
def _walk_and_fill(fields, data: dict, filled: list, errors: list, parent: str = ""):
for field in fields:
name = str(field.get("/T", ""))
full = f"{parent}.{name}" if parent else name
# Recurse into named groups
kids = field.get("/Kids")
if kids:
named = [k for k in kids if "/T" in k]
if named:
_walk_and_fill(named, data, filled, errors, full)
continue
if full not in data:
continue
value = data[full]
ftype = _field_type(field)
if ftype == "text":
field.update({
NameObject("/V"): TextStringObject(str(value)),
NameObject("/DV"): TextStringObject(str(value)),
})
filled.append(full)
elif ftype == "checkbox":
truthy = str(value).lower() in ("true", "1", "yes", "on")
on_val = _get_checkbox_on_value(field)
pdf_val = on_val if truthy else "/Off"
field.update({
NameObject("/V"): NameObject(pdf_val),
NameObject("/AS"): NameObject(pdf_val),
})
filled.append(full)
elif ftype in ("dropdown", "listbox"):
allowed = _get_dropdown_values(field)
if allowed and str(value) not in allowed:
errors.append({
"field": full,
"error": f"Value '{value}' not in allowed choices: {allowed}"
})
continue
field.update({NameObject("/V"): TextStringObject(str(value))})
filled.append(full)
elif ftype == "radio":
# Radio value must start with /
pdf_val = str(value) if str(value).startswith("/") else f"/{value}"
field.update({
NameObject("/V"): NameObject(pdf_val),
NameObject("/AS"): NameObject(pdf_val),
})
filled.append(full)
else:
errors.append({"field": full, "error": f"Unsupported field type: {ftype}"})
def fill(pdf_path: str, out_path: str, data: dict) -> dict:
try:
reader = PdfReader(pdf_path)
except Exception as e:
return {"status": "error", "error": str(e)}
writer = PdfWriter()
writer.clone_document_from_reader(reader)
acroform = writer._root_object.get("/AcroForm") # type: ignore[attr-defined]
if acroform is None or "/Fields" not in acroform:
return {
"status": "error",
"error": "This PDF has no fillable form fields.",
"hint": "Run fill_inspect.py first to confirm the PDF has fields.",
}
# Enable appearance regeneration so viewers show the new values
acroform.update({NameObject("/NeedAppearances"): BooleanObject(True)})
filled: list[str] = []
errors: list[dict] = []
_walk_and_fill(list(acroform["/Fields"]), data, filled, errors)
# Warn about requested fields that were never found
not_found = [k for k in data if k not in filled and not any(e["field"] == k for e in errors)]
try:
os.makedirs(os.path.dirname(os.path.abspath(out_path)), exist_ok=True)
with open(out_path, "wb") as f:
writer.write(f)
except Exception as e:
return {"status": "error", "error": f"Write failed: {e}"}
result = {
"status": "ok",
"out": out_path,
"filled_count": len(filled),
"filled_fields": filled,
"size_kb": os.path.getsize(out_path) // 1024,
}
if errors:
result["validation_errors"] = errors
if not_found:
result["not_found"] = not_found
result["hint"] = "Run fill_inspect.py to see all available field names."
return result
def main():
parser = argparse.ArgumentParser(description="Fill PDF form fields")
parser.add_argument("--input", required=True, help="Input PDF with form fields")
parser.add_argument("--out", required=True, help="Output PDF path")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--data", help="Path to JSON file with field values")
group.add_argument("--values", help="Inline JSON string with field values")
args = parser.parse_args()
if not os.path.exists(args.input):
print(json.dumps({"status": "error", "error": f"File not found: {args.input}"}),
file=sys.stderr)
sys.exit(1)
# Load data
try:
if args.data:
with open(args.data) as f:
data = json.load(f)
else:
data = json.loads(args.values)
except Exception as e:
print(json.dumps({"status": "error", "error": f"JSON parse error: {e}"}),
file=sys.stderr)
sys.exit(1)
result = fill(args.input, args.out, data)
print(json.dumps(result, indent=2, ensure_ascii=False))
if result["status"] == "ok":
print(f"\n── Fill complete ───────────────────────────────────────",
file=sys.stderr)
print(f" Output : {result['out']}", file=sys.stderr)
print(f" Filled : {result['filled_count']} field(s)", file=sys.stderr)
if result.get("validation_errors"):
print(f" Errors :", file=sys.stderr)
for e in result["validation_errors"]:
print(f"{e['field']}: {e['error']}", file=sys.stderr)
if result.get("not_found"):
print(f" Not found: {result['not_found']}", file=sys.stderr)
print("", file=sys.stderr)
else:
sys.exit(3)
if __name__ == "__main__":
main()