Initial commit: add all skills files

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 16:52:49 +08:00
commit 6487becf60
396 changed files with 108871 additions and 0 deletions
--- a/minimax-xlsx/scripts/formula_check.py
+++ b/minimax-xlsx/scripts/formula_check.py
@@ -0,0 +1,422 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+"""
+formula_check.py — Static formula validator for xlsx files.
+
+Usage:
+    python3 formula_check.py <input.xlsx>
+    python3 formula_check.py <input.xlsx> --json          # machine-readable output
+    python3 formula_check.py <input.xlsx> --report        # standardized validation report (JSON)
+    python3 formula_check.py <input.xlsx> --report -o out # report to file
+    python3 formula_check.py <input.xlsx> --sheet Sales   # limit to one sheet
+    python3 formula_check.py <input.xlsx> --summary       # error counts only, no details
+
+What it checks:
+1. Error-value cells: <c t="e"><v>#REF!</v></c> — all 7 Excel error types
+2. Broken cross-sheet references: formula references a sheet not in workbook.xml
+3. Broken named-range references: formula references a name not in workbook.xml <definedNames>
+4. Shared formula integrity: shared formula primary cell exists and has formula text
+5. Missing <v> on t="e" cells (malformed XML)
+
+Checks NOT performed (require dynamic recalculation):
+- Runtime errors that only appear after formulas execute (#DIV/0! on empty denominator, etc.)
+  -> Use libreoffice_recalc.py + re-run formula_check.py for dynamic validation
+
+Exit code:
+    0 — no errors found
+    1 — errors detected (or file cannot be opened)
+"""
+
+import sys
+import zipfile
+import xml.etree.ElementTree as ET
+import re
+import json
+
+# OOXML SpreadsheetML namespace
+NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
+NSP = f"{{{NS}}}"
+
+# All 7 standard Excel formula error types
+EXCEL_ERRORS = {"#REF!", "#DIV/0!", "#VALUE!", "#NAME?", "#NULL!", "#NUM!", "#N/A"}
+
+# Excel built-in function names (subset of common ones) — used for #NAME? heuristic
+# Full list: https://support.microsoft.com/en-us/office/excel-functions-alphabetical
+_BUILTIN_FUNCTIONS = {
+    "ABS", "AND", "AVERAGE", "AVERAGEIF", "AVERAGEIFS", "CEILING", "CHOOSE",
+    "COUNTA", "COUNTIF", "COUNTIFS", "COUNT", "DATE", "EDATE", "EOMONTH",
+    "FALSE", "FILTER", "FIND", "FLOOR", "IF", "IFERROR", "IFNA", "IFS",
+    "INDEX", "INDIRECT", "INT", "IRR", "ISBLANK", "ISERROR", "ISNA", "ISNUMBER",
+    "LARGE", "LEFT", "LEN", "LOOKUP", "LOWER", "MATCH", "MAX", "MID", "MIN",
+    "MOD", "MONTH", "NETWORKDAYS", "NOT", "NOW", "NPV", "OFFSET", "OR",
+    "PMT", "PV", "RAND", "RANK", "RIGHT", "ROUND", "ROUNDDOWN", "ROUNDUP",
+    "ROW", "ROWS", "SEARCH", "SMALL", "SORT", "SQRT", "SUBSTITUTE", "SUM",
+    "SUMIF", "SUMIFS", "SUMPRODUCT", "TEXT", "TODAY", "TRANSPOSE", "TRIM",
+    "TRUE", "UNIQUE", "UPPER", "VALUE", "VLOOKUP", "HLOOKUP", "XLOOKUP",
+    "XMATCH", "XNPV", "XIRR", "YEAR", "YEARFRAC",
+}
+
+
+def get_sheet_names(z: zipfile.ZipFile) -> dict[str, str]:
+    """Return dict of {r:id -> sheet_name} from workbook.xml."""
+    wb_xml = z.read("xl/workbook.xml")
+    wb = ET.fromstring(wb_xml)
+    rel_ns = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
+    sheets = {}
+    for sheet in wb.findall(f".//{NSP}sheet"):
+        name = sheet.get("name", "")
+        rid = sheet.get(f"{{{rel_ns}}}id", "")
+        sheets[rid] = name
+    return sheets
+
+
+def get_defined_names(z: zipfile.ZipFile) -> set[str]:
+    """Return set of named ranges defined in workbook.xml <definedNames>."""
+    wb_xml = z.read("xl/workbook.xml")
+    wb = ET.fromstring(wb_xml)
+    names = set()
+    for dn in wb.findall(f".//{NSP}definedName"):
+        n = dn.get("name", "")
+        if n:
+            names.add(n)
+    return names
+
+
+def get_sheet_files(z: zipfile.ZipFile) -> dict[str, str]:
+    """Return dict of {r:id -> xl/worksheets/sheetN.xml} from workbook.xml.rels."""
+    rels_xml = z.read("xl/_rels/workbook.xml.rels")
+    rels = ET.fromstring(rels_xml)
+    mapping = {}
+    for rel in rels:
+        rid = rel.get("Id", "")
+        target = rel.get("Target", "")
+        if "worksheets" in target:
+            # Target may be relative: "worksheets/sheet1.xml" -> "xl/worksheets/sheet1.xml"
+            if not target.startswith("xl/"):
+                target = "xl/" + target
+            mapping[rid] = target
+    return mapping
+
+
+def extract_sheet_refs(formula: str) -> list[str]:
+    """
+    Extract all sheet names referenced in a formula string.
+
+    Handles:
+      - 'Sheet Name'!A1  (quoted, may contain spaces)
+      - SheetName!A1     (unquoted, no spaces)
+
+    Returns a list of sheet name strings (may contain duplicates if the same
+    sheet is referenced multiple times in one formula).
+    """
+    refs = []
+    # Quoted sheet names: 'Sheet Name'!
+    for m in re.finditer(r"'([^']+)'!", formula):
+        refs.append(m.group(1))
+    # Unquoted sheet names: SheetName! (not preceded by a single quote)
+    for m in re.finditer(r"(?<!')([A-Za-z_\u4e00-\u9fff][A-Za-z0-9_.·\u4e00-\u9fff]*)!", formula):
+        refs.append(m.group(1))
+    return refs
+
+
+def extract_name_refs(formula: str) -> list[str]:
+    """
+    Extract identifiers in a formula that could be named range references.
+
+    Heuristic: identifiers that:
+    - Are not preceded by a sheet reference (no "!" before them)
+    - Are not followed by "(" (which would make them function calls)
+    - Match the pattern of a name (letters/underscore start, alphanumeric/underscore body)
+    - Are not single-letter column references or row references
+
+    This is approximate. False positives are possible; false negatives are rare.
+    """
+    names = []
+    # Remove quoted sheet references first to avoid false matches
+    formula_clean = re.sub(r"'[^']*'![A-Z$0-9:]+", "", formula)
+    formula_clean = re.sub(r"[A-Za-z_][A-Za-z0-9_.]*![A-Z$0-9:]+", "", formula_clean)
+    # Find identifiers not followed by "(" (not function calls)
+    for m in re.finditer(r"\b([A-Za-z_][A-Za-z0-9_]{2,})\b(?!\s*\()", formula_clean):
+        candidate = m.group(1)
+        # Exclude Excel cell references like A1, B10, AA100
+        if re.fullmatch(r"[A-Z]{1,3}[0-9]+", candidate):
+            continue
+        # Exclude built-in function names (they appear without parens sometimes in array formulas)
+        if candidate.upper() in _BUILTIN_FUNCTIONS:
+            continue
+        names.append(candidate)
+    return names
+
+
+def check(xlsx_path: str, sheet_filter: str | None = None) -> dict:
+    """
+    Run all static checks on the given xlsx file.
+
+    Args:
+        xlsx_path: path to the .xlsx file
+        sheet_filter: if provided, only check the sheet with this name
+
+    Returns:
+        A dict with keys:
+          file, sheets_checked, formula_count, shared_formula_ranges,
+          error_count, errors
+    """
+    results = {
+        "file": xlsx_path,
+        "sheets_checked": [],
+        "formula_count": 0,
+        "shared_formula_ranges": 0,  # number of shared formula definitions
+        "error_count": 0,
+        "errors": [],
+    }
+
+    try:
+        z = zipfile.ZipFile(xlsx_path, "r")
+    except (zipfile.BadZipFile, FileNotFoundError) as e:
+        results["errors"].append({"type": "file_error", "message": str(e)})
+        results["error_count"] = 1
+        return results
+
+    with z:
+        sheet_names = get_sheet_names(z)
+        sheet_files = get_sheet_files(z)
+        valid_sheet_names = set(sheet_names.values())
+        defined_names = get_defined_names(z)
+
+        for rid, sheet_name in sheet_names.items():
+            # Apply sheet filter if requested
+            if sheet_filter and sheet_name != sheet_filter:
+                continue
+
+            ws_file = sheet_files.get(rid)
+            if not ws_file or ws_file not in z.namelist():
+                continue
+
+            results["sheets_checked"].append(sheet_name)
+            ws_xml = z.read(ws_file)
+            ws = ET.fromstring(ws_xml)
+
+            # Track shared formula IDs seen on this sheet (si -> primary cell ref)
+            shared_primary: dict[str, str] = {}
+
+            for cell in ws.findall(f".//{NSP}c"):
+                cell_ref = cell.get("r", "?")
+                cell_type = cell.get("t", "n")
+
+                # ── Check 1: error-value cell ──────────────────────────────
+                if cell_type == "e":
+                    v_elem = cell.find(f"{NSP}v")
+                    if v_elem is None:
+                        # Malformed: t="e" but no <v> — record as structural issue
+                        results["errors"].append(
+                            {
+                                "type": "malformed_error_cell",
+                                "sheet": sheet_name,
+                                "cell": cell_ref,
+                                "detail": "Cell has t='e' but no <v> child element",
+                            }
+                        )
+                        results["error_count"] += 1
+                    else:
+                        error_val = v_elem.text or "#UNKNOWN"
+                        f_elem = cell.find(f"{NSP}f")
+                        results["errors"].append(
+                            {
+                                "type": "error_value",
+                                "error": error_val,
+                                "sheet": sheet_name,
+                                "cell": cell_ref,
+                                # Include formula text if present
+                                "formula": f_elem.text if (f_elem is not None and f_elem.text) else None,
+                            }
+                        )
+                        results["error_count"] += 1
+
+                # ── Check 2 & 3: formulas ──────────────────────────────────
+                f_elem = cell.find(f"{NSP}f")
+                if f_elem is None:
+                    continue
+
+                f_type = f_elem.get("t", "")  # "shared", "array", or "" for normal
+                f_si = f_elem.get("si")       # shared formula group ID
+
+                # Count formulas:
+                # - Normal formulas: always count
+                # - Shared formula PRIMARY (has text + ref attribute): count once
+                # - Shared formula CONSUMER (si only, no text): do NOT count separately
+                #   (they are covered by the primary's ref range)
+                if f_type == "shared" and f_elem.text is None:
+                    # Consumer cell: skip formula counting and cross-ref checks
+                    # (the primary cell already covers this formula)
+                    continue
+
+                formula = f_elem.text or ""
+
+                if f_type == "shared" and f_elem.get("ref"):
+                    results["shared_formula_ranges"] += 1
+                    if f_si is not None:
+                        shared_primary[f_si] = cell_ref
+
+                if formula:
+                    results["formula_count"] += 1
+
+                    # Check 2: cross-sheet references
+                    for ref_sheet in extract_sheet_refs(formula):
+                        if ref_sheet not in valid_sheet_names:
+                            results["errors"].append(
+                                {
+                                    "type": "broken_sheet_ref",
+                                    "sheet": sheet_name,
+                                    "cell": cell_ref,
+                                    "formula": formula,
+                                    "missing_sheet": ref_sheet,
+                                    "valid_sheets": sorted(valid_sheet_names),
+                                }
+                            )
+                            results["error_count"] += 1
+
+                    # Check 3: named range references
+                    # Only flag if the name is not a built-in and not a sheet-prefixed ref
+                    for name_ref in extract_name_refs(formula):
+                        if name_ref not in defined_names:
+                            results["errors"].append(
+                                {
+                                    "type": "unknown_name_ref",
+                                    "sheet": sheet_name,
+                                    "cell": cell_ref,
+                                    "formula": formula,
+                                    "unknown_name": name_ref,
+                                    "defined_names": sorted(defined_names),
+                                    "note": "Heuristic check — verify manually if this is a false positive",
+                                }
+                            )
+                            results["error_count"] += 1
+
+    return results
+
+
+def build_report(results: dict) -> dict:
+    """
+    Transform raw check() output into a standardized validation report.
+
+    Usage:
+        python3 formula_check.py <input.xlsx> --report          # JSON report to stdout
+        python3 formula_check.py <input.xlsx> --report -o out   # JSON report to file
+    """
+    from collections import Counter
+
+    errors = results.get("errors", [])
+    error_types = [e.get("error", e.get("type", "unknown")) for e in errors]
+
+    return {
+        "status": "success" if results["error_count"] == 0 else "errors_found",
+        "file": results["file"],
+        "sheets_checked": results["sheets_checked"],
+        "total_formulas": results["formula_count"],
+        "total_errors": results["error_count"],
+        "shared_formula_ranges": results.get("shared_formula_ranges", 0),
+        "errors_by_type": dict(Counter(error_types)) if errors else {},
+        "errors": errors,
+    }
+
+
+def main() -> None:
+    use_json = "--json" in sys.argv
+    use_report = "--report" in sys.argv
+    summary_only = "--summary" in sys.argv
+    output_file = None
+    sheet_filter = None
+    args_clean = []
+
+    i = 1
+    while i < len(sys.argv):
+        arg = sys.argv[i]
+        if arg == "--sheet" and i + 1 < len(sys.argv):
+            sheet_filter = sys.argv[i + 1]
+            i += 2
+        elif arg == "-o" and i + 1 < len(sys.argv):
+            output_file = sys.argv[i + 1]
+            i += 2
+        elif arg.startswith("--"):
+            i += 1  # skip flags already handled
+        else:
+            args_clean.append(arg)
+            i += 1
+
+    if not args_clean:
+        print("Usage: formula_check.py <input.xlsx> [--json] [--report [-o FILE]] [--sheet NAME] [--summary]")
+        sys.exit(1)
+
+    results = check(args_clean[0], sheet_filter=sheet_filter)
+
+    if use_report:
+        report = build_report(results)
+        output = json.dumps(report, indent=2, ensure_ascii=False)
+        if output_file:
+            with open(output_file, "w", encoding="utf-8") as f:
+                f.write(output + "\n")
+        else:
+            print(output)
+        sys.exit(1 if results["error_count"] > 0 else 0)
+
+    if use_json:
+        print(json.dumps(results, indent=2, ensure_ascii=False))
+        sys.exit(1 if results["error_count"] > 0 else 0)
+
+    # Human-readable output
+    sheets = ", ".join(results["sheets_checked"]) or "(none)"
+    if sheet_filter:
+        sheets = f"{sheet_filter} (filtered)"
+
+    print(f"File   : {results['file']}")
+    print(f"Sheets : {sheets}")
+    print(f"Formulas checked      : {results['formula_count']} distinct formula cells")
+    print(f"Shared formula ranges : {results['shared_formula_ranges']} ranges")
+    print(f"Errors found          : {results['error_count']}")
+
+    if not summary_only and results["errors"]:
+        print("\n── Error Details ──")
+        for e in results["errors"]:
+            if e["type"] == "error_value":
+                formula_hint = f" (formula: {e['formula']})" if e.get("formula") else ""
+                print(f"  [FAIL] [{e['sheet']}!{e['cell']}] contains {e['error']}{formula_hint}")
+            elif e["type"] == "broken_sheet_ref":
+                print(
+                    f"  [FAIL] [{e['sheet']}!{e['cell']}] references missing sheet "
+                    f"'{e['missing_sheet']}'"
+                )
+                print(f"         Formula: {e['formula']}")
+                print(f"         Valid sheets: {e.get('valid_sheets', [])}")
+            elif e["type"] == "unknown_name_ref":
+                print(
+                    f"  [WARN] [{e['sheet']}!{e['cell']}] uses unknown name "
+                    f"'{e['unknown_name']}' (heuristic — verify manually)"
+                )
+                print(f"         Formula: {e['formula']}")
+                print(f"         Defined names: {e.get('defined_names', [])}")
+            elif e["type"] == "malformed_error_cell":
+                print(f"  [FAIL] [{e['sheet']}!{e['cell']}] malformed error cell: {e['detail']}")
+            elif e["type"] == "file_error":
+                print(f"  [FAIL] File error: {e['message']}")
+        print()
+
+    if results["error_count"] == 0:
+        print("PASS — No formula errors detected")
+    else:
+        # Separate definitive failures from heuristic warnings
+        hard_errors = [e for e in results["errors"] if e["type"] != "unknown_name_ref"]
+        warnings = [e for e in results["errors"] if e["type"] == "unknown_name_ref"]
+        if hard_errors:
+            print(f"FAIL — {len(hard_errors)} error(s) must be fixed before delivery")
+            if warnings:
+                print(f"WARN — {len(warnings)} heuristic warning(s) require manual review")
+            sys.exit(1)
+        else:
+            # Only heuristic warnings — do not block delivery but alert
+            print(f"PASS with WARN — {len(warnings)} heuristic warning(s) require manual review")
+            # Exit 0: heuristic warnings alone do not block delivery
+            sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
--- a/minimax-xlsx/scripts/libreoffice_recalc.py
+++ b/minimax-xlsx/scripts/libreoffice_recalc.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+"""
+libreoffice_recalc.py — Tier 2 dynamic formula recalculation via LibreOffice headless.
+
+Opens the xlsx file with the LibreOffice Calc engine, executes all formulas, writes
+the computed values into the <v> cache elements, and saves the result. This is the
+closest server-side equivalent of "open in Excel and save."
+
+After recalculation, run formula_check.py on the output file to detect runtime errors
+(#DIV/0!, #N/A, etc.) that only surface after actual computation.
+
+Usage:
+    python3 libreoffice_recalc.py input.xlsx output.xlsx
+    python3 libreoffice_recalc.py input.xlsx output.xlsx --timeout 90
+    python3 libreoffice_recalc.py --check          # check LibreOffice availability only
+
+Exit codes:
+    0 — recalculation succeeded, output file written
+    2 — LibreOffice not found (Tier 2 unavailable — not a hard failure, note in report)
+    1 — LibreOffice found but recalculation failed (timeout, crash, bad file)
+"""
+
+import subprocess
+import sys
+import shutil
+import os
+import tempfile
+import argparse
+
+
+# ── LibreOffice discovery ───────────────────────────────────────────────────
+
+def find_soffice() -> str | None:
+    """
+    Locate the soffice (LibreOffice) binary.
+
+    Search order:
+      1. macOS application bundle (default install location)
+      2. PATH lookup for 'soffice'
+      3. PATH lookup for 'libreoffice' (common on Linux)
+    """
+    candidates = [
+        "/Applications/LibreOffice.app/Contents/MacOS/soffice",  # macOS
+        "soffice",     # Linux / macOS if on PATH
+        "libreoffice", # alternative Linux name
+    ]
+    for c in candidates:
+        # shutil.which handles PATH lookup; also check absolute paths directly
+        found = shutil.which(c)
+        if found:
+            return found
+        if os.path.isfile(c) and os.access(c, os.X_OK):
+            return c
+    return None
+
+
+def get_libreoffice_version(soffice: str) -> str:
+    """Return LibreOffice version string, or 'unknown' on failure."""
+    try:
+        result = subprocess.run(
+            [soffice, "--version"],
+            capture_output=True,
+            timeout=10,
+        )
+        return result.stdout.decode(errors="replace").strip()
+    except Exception:
+        return "unknown"
+
+
+# ── Recalculation ───────────────────────────────────────────────────────────
+
+def recalculate(
+    input_path: str,
+    output_path: str,
+    timeout: int = 60,
+) -> tuple[bool, str]:
+    """
+    Run LibreOffice headless recalculation on input_path, write result to output_path.
+
+    Returns:
+        (success: bool, message: str)
+
+    The message explains what happened (success or failure reason).
+    """
+    soffice = find_soffice()
+    if not soffice:
+        return False, (
+            "LibreOffice not found. Tier 2 validation is unavailable in this environment. "
+            "Install LibreOffice to enable dynamic formula recalculation.\n"
+            "  macOS:  brew install --cask libreoffice\n"
+            "  Linux:  sudo apt-get install -y libreoffice"
+        )
+
+    version = get_libreoffice_version(soffice)
+
+    # Work on a copy in a temp directory to avoid side effects on the source file.
+    # LibreOffice writes the output using the same filename stem in --outdir.
+    with tempfile.TemporaryDirectory(prefix="xlsx_recalc_") as tmpdir:
+        tmp_input = os.path.join(tmpdir, os.path.basename(input_path))
+        shutil.copy(input_path, tmp_input)
+
+        cmd = [
+            soffice,
+            "--headless",
+            "--norestore",           # do not attempt to restore crashed sessions
+            "--infilter=Calc MS Excel 2007 XML",
+            "--convert-to", "xlsx",
+            "--outdir", tmpdir,
+            tmp_input,
+        ]
+
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                timeout=timeout,
+            )
+        except subprocess.TimeoutExpired:
+            return False, (
+                f"LibreOffice timed out after {timeout}s. "
+                "The file may be too large or contain constructs that cause LibreOffice to hang. "
+                "Try increasing --timeout or simplify the file."
+            )
+        except FileNotFoundError:
+            return False, f"LibreOffice binary not executable: {soffice}"
+
+        if result.returncode != 0:
+            stderr = result.stderr.decode(errors="replace").strip()
+            stdout = result.stdout.decode(errors="replace").strip()
+            return False, (
+                f"LibreOffice exited with code {result.returncode}.\n"
+                f"stderr: {stderr}\n"
+                f"stdout: {stdout}"
+            )
+
+        # LibreOffice writes: <tmpdir>/<stem>.xlsx
+        stem = os.path.splitext(os.path.basename(tmp_input))[0]
+        tmp_output = os.path.join(tmpdir, stem + ".xlsx")
+
+        if not os.path.isfile(tmp_output):
+            # Try to find any .xlsx file in tmpdir (LibreOffice may behave differently)
+            xlsx_files = [f for f in os.listdir(tmpdir) if f.endswith(".xlsx") and f != os.path.basename(tmp_input)]
+            if xlsx_files:
+                tmp_output = os.path.join(tmpdir, xlsx_files[0])
+            else:
+                stdout = result.stdout.decode(errors="replace").strip()
+                return False, (
+                    f"LibreOffice succeeded (exit 0) but output file not found in {tmpdir}.\n"
+                    f"stdout: {stdout}\n"
+                    f"Files in tmpdir: {os.listdir(tmpdir)}"
+                )
+
+        # Copy recalculated file to final destination
+        os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
+        shutil.copy(tmp_output, output_path)
+
+    return True, f"Recalculation complete. LibreOffice {version}. Output: {output_path}"
+
+
+# ── CLI ─────────────────────────────────────────────────────────────────────
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="LibreOffice headless formula recalculation for xlsx files.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Basic recalculation
+  python3 libreoffice_recalc.py report.xlsx report_recalc.xlsx
+
+  # With extended timeout for large files
+  python3 libreoffice_recalc.py big_model.xlsx big_model_recalc.xlsx --timeout 120
+
+  # Check if LibreOffice is available (useful in CI)
+  python3 libreoffice_recalc.py --check
+
+  # Full validation pipeline
+  python3 libreoffice_recalc.py input.xlsx /tmp/recalc.xlsx && \\
+    python3 formula_check.py /tmp/recalc.xlsx
+""",
+    )
+    parser.add_argument("input", nargs="?", help="Input xlsx file path")
+    parser.add_argument("output", nargs="?", help="Output xlsx file path (recalculated)")
+    parser.add_argument(
+        "--timeout",
+        type=int,
+        default=60,
+        metavar="SECONDS",
+        help="Maximum time to wait for LibreOffice (default: 60)",
+    )
+    parser.add_argument(
+        "--check",
+        action="store_true",
+        help="Only check if LibreOffice is available, then exit",
+    )
+
+    args = parser.parse_args()
+
+    # ── --check mode ─────────────────────────────────────────────────────────
+    if args.check:
+        soffice = find_soffice()
+        if soffice:
+            version = get_libreoffice_version(soffice)
+            print(f"LibreOffice available: {soffice}")
+            print(f"Version: {version}")
+            sys.exit(0)
+        else:
+            print("LibreOffice NOT available.")
+            print("Tier 2 dynamic validation requires LibreOffice.")
+            print("  macOS:  brew install --cask libreoffice")
+            print("  Linux:  sudo apt-get install -y libreoffice")
+            sys.exit(2)
+
+    # ── Recalculation mode ────────────────────────────────────────────────────
+    if not args.input or not args.output:
+        parser.print_help()
+        sys.exit(1)
+
+    if not os.path.isfile(args.input):
+        print(f"ERROR: Input file not found: {args.input}")
+        sys.exit(1)
+
+    print(f"Input  : {args.input}")
+    print(f"Output : {args.output}")
+    print(f"Timeout: {args.timeout}s")
+    print()
+
+    success, message = recalculate(args.input, args.output, timeout=args.timeout)
+
+    if success:
+        print(f"OK: {message}")
+        print()
+        print("Next step: run formula_check.py on the recalculated file to detect runtime errors:")
+        print(f"  python3 formula_check.py {args.output}")
+        sys.exit(0)
+    else:
+        # Distinguish "not installed" (exit 2) from "failed" (exit 1)
+        if "not found" in message.lower() or "not available" in message.lower():
+            print(f"SKIP (Tier 2 unavailable): {message}")
+            sys.exit(2)
+        else:
+            print(f"ERROR: {message}")
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/minimax-xlsx/scripts/shared_strings_builder.py
+++ b/minimax-xlsx/scripts/shared_strings_builder.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+"""
+shared_strings_builder.py — Generate a valid sharedStrings.xml from a list of strings.
+
+Usage (strings as command-line arguments):
+    python3 shared_strings_builder.py "Revenue" "Cost" "Gross Profit" > sharedStrings.xml
+
+Usage (strings from a file, one per line):
+    python3 shared_strings_builder.py --file strings.txt > sharedStrings.xml
+
+Usage (print index table instead of XML, for reference):
+    python3 shared_strings_builder.py --index "Revenue" "Cost" "Gross Profit"
+    python3 shared_strings_builder.py --index --file strings.txt
+
+Output format:
+    Valid xl/sharedStrings.xml written to stdout.
+    Redirect to the correct path:
+        python3 shared_strings_builder.py "A" "B" > /tmp/xlsx_work/xl/sharedStrings.xml
+
+Notes:
+    - Strings are de-duplicated: identical strings appear only once in the table.
+    - The 'count' attribute equals the number of unique strings (appropriate for new files
+      where each string is used in exactly one cell). If a string appears in multiple cells,
+      manually increment 'count' by the number of extra references.
+    - Special characters (&, <, >) are automatically XML-escaped.
+    - Leading/trailing spaces are preserved with xml:space="preserve".
+"""
+
+import sys
+import html
+import argparse
+
+
+HEADER = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
+SST_NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
+
+
+def escape_text(s: str) -> tuple[str, bool]:
+    """
+    Return (escaped_text, needs_preserve).
+    needs_preserve is True if the string has leading or trailing whitespace.
+    """
+    escaped = html.escape(s, quote=False)
+    needs_preserve = s != s.strip()
+    return escaped, needs_preserve
+
+
+def build_xml(strings: list[str]) -> str:
+    """Build sharedStrings.xml content from a list of unique strings."""
+    n = len(strings)
+    lines = [
+        HEADER,
+        f'<sst xmlns="{SST_NS}" count="{n}" uniqueCount="{n}">',
+    ]
+    for i, s in enumerate(strings):
+        escaped, preserve = escape_text(s)
+        if preserve:
+            lines.append(f'  <si><t xml:space="preserve">{escaped}</t></si>'
+                         f'  <!-- index {i} -->')
+        else:
+            lines.append(f'  <si><t>{escaped}</t></si>  <!-- index {i} -->')
+    lines.append("</sst>")
+    return "\n".join(lines) + "\n"
+
+
+def build_index_table(strings: list[str]) -> str:
+    """Return a human-readable index table (for agent reference, not written to file)."""
+    lines = [
+        f"{'Index':<6}  String",
+        "-" * 50,
+    ]
+    for i, s in enumerate(strings):
+        lines.append(f"{i:<6}  {s!r}")
+    lines.append("")
+    lines.append(
+        f"Total: {len(strings)} unique strings. "
+        "Use these indices in <c t=\"s\"><v>N</v></c> cells."
+    )
+    return "\n".join(lines) + "\n"
+
+
+def deduplicate(strings: list[str]) -> list[str]:
+    """Remove duplicates while preserving first-occurrence order."""
+    seen: set[str] = set()
+    result: list[str] = []
+    for s in strings:
+        if s not in seen:
+            seen.add(s)
+            result.append(s)
+    return result
+
+
+def load_from_file(path: str) -> list[str]:
+    """Read one string per non-empty line from a file."""
+    with open(path, encoding="utf-8") as f:
+        return [line.rstrip("\n") for line in f if line.strip()]
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Generate xl/sharedStrings.xml from a list of strings.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "strings",
+        nargs="*",
+        metavar="STRING",
+        help="String values to include in the shared string table.",
+    )
+    parser.add_argument(
+        "--file",
+        "-f",
+        metavar="PATH",
+        help="Read strings from a file (one string per line) instead of arguments.",
+    )
+    parser.add_argument(
+        "--index",
+        action="store_true",
+        help="Print a human-readable index table instead of XML output.",
+    )
+    args = parser.parse_args()
+
+    if args.file:
+        try:
+            raw = load_from_file(args.file)
+        except FileNotFoundError:
+            print(f"ERROR: File not found: {args.file}", file=sys.stderr)
+            sys.exit(1)
+        except OSError as e:
+            print(f"ERROR: Cannot read file: {e}", file=sys.stderr)
+            sys.exit(1)
+    else:
+        raw = list(args.strings)
+
+    if not raw:
+        print(
+            "ERROR: No strings provided.\n"
+            "Usage: shared_strings_builder.py \"String1\" \"String2\" ...\n"
+            "   or: shared_strings_builder.py --file strings.txt",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    strings = deduplicate(raw)
+
+    if len(strings) < len(raw):
+        removed = len(raw) - len(strings)
+        print(
+            f"Note: {removed} duplicate(s) removed. "
+            f"{len(strings)} unique strings in table.",
+            file=sys.stderr,
+        )
+
+    if args.index:
+        print(build_index_table(strings))
+    else:
+        print(build_xml(strings), end="")
+
+
+if __name__ == "__main__":
+    main()
--- a/minimax-xlsx/scripts/style_audit.py
+++ b/minimax-xlsx/scripts/style_audit.py
@@ -0,0 +1,575 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+"""
+style_audit.py — Financial formatting compliance checker for xlsx files.
+
+Audits an xlsx file (or an unpacked xlsx directory) and reports:
+1. Style system integrity: count attributes match actual element counts
+2. Color-role violations: formula cells with blue font, input cells with black font
+3. Year-format violations: cells containing 4-digit years using comma-format
+4. Percentage value violations: percentage-formatted cells with values > 1 (likely meant 0.08 not 8)
+5. Style index out-of-range: s attribute exceeds cellXfs count
+6. fills[0]/fills[1] presence check (OOXML spec requirement)
+
+Usage:
+    python3 style_audit.py input.xlsx                  # audit a packed xlsx
+    python3 style_audit.py /tmp/xlsx_work/             # audit an unpacked directory
+    python3 style_audit.py input.xlsx --json           # machine-readable output
+    python3 style_audit.py input.xlsx --summary        # counts only, no detail
+
+Exit code:
+    0 — no violations found
+    1 — violations detected (or file cannot be opened)
+"""
+
+import sys
+import os
+import zipfile
+import xml.etree.ElementTree as ET
+import json
+import re
+import tempfile
+import shutil
+
+NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
+NSP = f"{{{NS}}}"
+
+# Predefined style index semantics from minimal_xlsx template.
+# Maps cellXfs index -> (role, font_color_expectation, numFmt_type)
+# role: "input" = blue expected, "formula" = black/green expected, "header" = any, "any" = skip
+TEMPLATE_SLOT_ROLES = {
+    0:  ("any",     None,    None),
+    1:  ("input",   "blue",  "general"),
+    2:  ("formula", "black", "general"),
+    3:  ("formula", "green", "general"),
+    4:  ("any",     None,    "general"),   # header
+    5:  ("input",   "blue",  "currency"),
+    6:  ("formula", "black", "currency"),
+    7:  ("input",   "blue",  "percent"),
+    8:  ("formula", "black", "percent"),
+    9:  ("input",   "blue",  "integer"),
+    10: ("formula", "black", "integer"),
+    11: ("input",   "blue",  "year"),
+    12: ("input",   "blue",  "general"),   # highlight
+}
+
+# AARRGGBB values for each role color
+BLUE_RGB  = "000000ff"
+BLACK_RGB = "00000000"
+GREEN_RGB = "00008000"
+RED_RGB   = "00ff0000"
+
+# numFmtIds that represent percentage formats (built-in + common custom)
+PERCENT_FMT_IDS = {9, 10, 165, 170}
+
+# numFmtIds that use comma separator (would corrupt year display)
+COMMA_FMT_IDS = {3, 4, 167, 168}  # #,##0 style — 4-digit years would show as 2,024
+
+
+def _parse_styles(styles_xml: bytes) -> dict:
+    """Parse styles.xml and return structured data."""
+    root = ET.fromstring(styles_xml)
+
+    def find(tag):
+        return root.find(f"{NSP}{tag}")
+
+    # numFmts
+    num_fmts = {}  # id -> formatCode
+    nf_elem = find("numFmts")
+    if nf_elem is not None:
+        declared_count = int(nf_elem.get("count", "0"))
+        actual_count = len(nf_elem)
+        for nf in nf_elem:
+            fid = int(nf.get("numFmtId", "0"))
+            num_fmts[fid] = nf.get("formatCode", "")
+    else:
+        declared_count = 0
+        actual_count = 0
+
+    # fonts — extract color and bold flag
+    fonts = []
+    fonts_elem = find("fonts")
+    fonts_declared = 0
+    if fonts_elem is not None:
+        fonts_declared = int(fonts_elem.get("count", "0"))
+        for font in fonts_elem:
+            color_elem = font.find(f"{NSP}color")
+            bold_elem = font.find(f"{NSP}b")
+            if color_elem is not None:
+                rgb = color_elem.get("rgb", "").lower()
+                theme = color_elem.get("theme")
+            else:
+                rgb = ""
+                theme = None
+            fonts.append({
+                "rgb": rgb,
+                "theme": theme,
+                "bold": bold_elem is not None,
+            })
+
+    # fills
+    fills = []
+    fills_elem = find("fills")
+    fills_declared = 0
+    if fills_elem is not None:
+        fills_declared = int(fills_elem.get("count", "0"))
+        for fill in fills_elem:
+            pf = fill.find(f"{NSP}patternFill")
+            pattern_type = pf.get("patternType", "") if pf is not None else ""
+            fills.append({"patternType": pattern_type})
+
+    # cellXfs
+    xfs = []
+    xfs_elem = find("cellXfs")
+    xfs_declared = 0
+    if xfs_elem is not None:
+        xfs_declared = int(xfs_elem.get("count", "0"))
+        for xf in xfs_elem:
+            xfs.append({
+                "numFmtId": int(xf.get("numFmtId", "0")),
+                "fontId":   int(xf.get("fontId", "0")),
+                "fillId":   int(xf.get("fillId", "0")),
+                "borderId": int(xf.get("borderId", "0")),
+            })
+
+    return {
+        "num_fmts": num_fmts,
+        "num_fmts_declared": declared_count,
+        "num_fmts_actual": actual_count,
+        "fonts": fonts,
+        "fonts_declared": fonts_declared,
+        "fonts_actual": len(fonts),
+        "fills": fills,
+        "fills_declared": fills_declared,
+        "fills_actual": len(fills),
+        "xfs": xfs,
+        "xfs_declared": xfs_declared,
+        "xfs_actual": len(xfs),
+    }
+
+
+def _is_blue_font(font: dict) -> bool:
+    return font["rgb"] == BLUE_RGB
+
+
+def _is_black_font(font: dict) -> bool:
+    return font["rgb"] == BLACK_RGB or (font["rgb"] == "" and font["theme"] is not None)
+
+
+def _is_green_font(font: dict) -> bool:
+    return font["rgb"] == GREEN_RGB
+
+
+def _fmt_is_percent(num_fmt_id: int, num_fmts: dict) -> bool:
+    if num_fmt_id in PERCENT_FMT_IDS:
+        return True
+    fmt_code = num_fmts.get(num_fmt_id, "")
+    return "%" in fmt_code
+
+
+def _fmt_is_comma(num_fmt_id: int, num_fmts: dict) -> bool:
+    if num_fmt_id in COMMA_FMT_IDS:
+        return True
+    fmt_code = num_fmts.get(num_fmt_id, "")
+    # formatCode has comma separator if it contains #,##0 but not a trailing , (scale)
+    return "#,##" in fmt_code and not fmt_code.endswith(",") and not fmt_code.endswith(",\"M\"") and not fmt_code.endswith(",\"K\"")
+
+
+def _looks_like_year(value_text: str) -> bool:
+    """True if value is a 4-digit year between 1900 and 2100."""
+    try:
+        v = int(float(value_text))
+        return 1900 <= v <= 2100
+    except (ValueError, TypeError):
+        return False
+
+
+def _audit(styles_xml: bytes, sheet_xmls: list[tuple[str, bytes]]) -> dict:
+    """
+    Run all formatting compliance checks.
+
+    Args:
+        styles_xml: content of xl/styles.xml
+        sheet_xmls: list of (sheet_name, xml_bytes) for each worksheet
+
+    Returns:
+        dict with violations and summary
+    """
+    results = {
+        "violations": [],
+        "warnings": [],
+        "summary": {},
+    }
+    v = results["violations"]
+    w = results["warnings"]
+
+    styles = _parse_styles(styles_xml)
+    fonts  = styles["fonts"]
+    xfs    = styles["xfs"]
+    num_fmts = styles["num_fmts"]
+
+    # ── Check A: count attribute integrity ──────────────────────────────────
+    if styles["fonts_declared"] != styles["fonts_actual"]:
+        v.append({
+            "type": "count_mismatch",
+            "element": "fonts",
+            "declared": styles["fonts_declared"],
+            "actual": styles["fonts_actual"],
+            "fix": f"Update <fonts count=\"{styles['fonts_actual']}\">",
+        })
+    if styles["fills_declared"] != styles["fills_actual"]:
+        v.append({
+            "type": "count_mismatch",
+            "element": "fills",
+            "declared": styles["fills_declared"],
+            "actual": styles["fills_actual"],
+            "fix": f"Update <fills count=\"{styles['fills_actual']}\">",
+        })
+    if styles["xfs_declared"] != styles["xfs_actual"]:
+        v.append({
+            "type": "count_mismatch",
+            "element": "cellXfs",
+            "declared": styles["xfs_declared"],
+            "actual": styles["xfs_actual"],
+            "fix": f"Update <cellXfs count=\"{styles['xfs_actual']}\">",
+        })
+
+    # ── Check B: fills[0] and fills[1] presence ──────────────────────────────
+    fills = styles["fills"]
+    if len(fills) < 2:
+        v.append({
+            "type": "missing_required_fills",
+            "detail": "fills[0] (none) and fills[1] (gray125) are required by OOXML spec",
+            "fix": "Prepend <fill><patternFill patternType='none'/></fill> and <fill><patternFill patternType='gray125'/></fill>",
+        })
+    else:
+        if fills[0].get("patternType") != "none":
+            v.append({
+                "type": "fills_0_corrupted",
+                "detail": f"fills[0] patternType='{fills[0].get('patternType')}', must be 'none'",
+                "fix": "Set fills[0] patternFill patternType to 'none'",
+            })
+        if fills[1].get("patternType") != "gray125":
+            v.append({
+                "type": "fills_1_corrupted",
+                "detail": f"fills[1] patternType='{fills[1].get('patternType')}', must be 'gray125'",
+                "fix": "Set fills[1] patternFill patternType to 'gray125'",
+            })
+
+    # ── Check C: per-cell style violations ───────────────────────────────────
+    total_cells = 0
+    formula_cells = 0
+    input_cells = 0
+
+    for sheet_name, sheet_xml in sheet_xmls:
+        ws = ET.fromstring(sheet_xml)
+
+        for cell in ws.findall(f".//{NSP}c"):
+            cell_ref = cell.get("r", "?")
+            s_attr = cell.get("s")
+            has_formula = cell.find(f"{NSP}f") is not None
+            v_elem = cell.find(f"{NSP}v")
+            value_text = v_elem.text if v_elem is not None else None
+            total_cells += 1
+
+            # Skip cells with no style
+            if s_attr is None:
+                continue
+
+            try:
+                s_idx = int(s_attr)
+            except ValueError:
+                continue
+
+            # Check C1: s index out of range
+            if s_idx >= len(xfs):
+                v.append({
+                    "type": "style_index_out_of_range",
+                    "sheet": sheet_name,
+                    "cell": cell_ref,
+                    "s": s_idx,
+                    "cellXfs_count": len(xfs),
+                    "fix": f"s={s_idx} exceeds cellXfs count={len(xfs)}; add missing <xf> entries or lower s value",
+                })
+                continue
+
+            xf = xfs[s_idx]
+            font_id = xf["fontId"]
+            num_fmt_id = xf["numFmtId"]
+
+            if font_id >= len(fonts):
+                v.append({
+                    "type": "font_index_out_of_range",
+                    "sheet": sheet_name,
+                    "cell": cell_ref,
+                    "fontId": font_id,
+                    "fonts_count": len(fonts),
+                    "fix": f"fontId={font_id} exceeds fonts count={len(fonts)}; add missing <font> entries",
+                })
+                continue
+
+            font = fonts[font_id]
+
+            # Check C2: color-role violation — formula cell with blue font
+            if has_formula and _is_blue_font(font):
+                formula_cells += 1
+                f_elem = cell.find(f"{NSP}f")
+                formula_text = f_elem.text if f_elem is not None else ""
+                v.append({
+                    "type": "formula_cell_blue_font",
+                    "sheet": sheet_name,
+                    "cell": cell_ref,
+                    "s": s_idx,
+                    "formula": formula_text,
+                    "fix": "Formula cells must use black font (formula) or green font (cross-sheet ref). "
+                           "Use style index 2/6/8/10 (black) or 3/13 (green) instead.",
+                })
+
+            # Check C3: color-role violation — non-formula cell with explicit black
+            # (only flag if it looks like it should be an input — has a numeric value)
+            if (not has_formula and _is_black_font(font)
+                    and value_text is not None
+                    and not font.get("bold")
+                    and num_fmt_id not in (0,)   # skip general-format black (could be label)
+            ):
+                try:
+                    float(value_text)
+                    # It's a numeric value with black font — possible missing blue input marker
+                    w.append({
+                        "type": "numeric_input_may_lack_blue",
+                        "sheet": sheet_name,
+                        "cell": cell_ref,
+                        "s": s_idx,
+                        "value": value_text,
+                        "note": "Hardcoded numeric value has black font — if this is a user-editable "
+                                "assumption, change to blue-font input style (e.g. s=1/5/7/9/11/12).",
+                    })
+                except (ValueError, TypeError):
+                    pass
+
+            # Check C4: year value with comma-formatted numFmt
+            if value_text and _looks_like_year(value_text) and _fmt_is_comma(num_fmt_id, num_fmts):
+                v.append({
+                    "type": "year_with_comma_format",
+                    "sheet": sheet_name,
+                    "cell": cell_ref,
+                    "s": s_idx,
+                    "value": value_text,
+                    "numFmtId": num_fmt_id,
+                    "fix": "Year values must use numFmtId=1 (format '0') to display as 2024 not 2,024. "
+                           "Use style index 11 or a custom xf with numFmtId=1.",
+                })
+
+            # Check C5: percentage format with value > 1 (likely 8 instead of 0.08)
+            if value_text and _fmt_is_percent(num_fmt_id, num_fmts):
+                try:
+                    pct_val = float(value_text)
+                    if pct_val > 1.0:
+                        w.append({
+                            "type": "percent_value_gt_1",
+                            "sheet": sheet_name,
+                            "cell": cell_ref,
+                            "s": s_idx,
+                            "value": value_text,
+                            "displayed_as": f"{pct_val * 100:.0f}%",
+                            "note": f"Value {value_text} with percentage format displays as {pct_val*100:.0f}%. "
+                                    "If intended rate is ~{:.0f}%, store as {:.4f} instead.".format(
+                                        pct_val, pct_val / 100
+                                    ),
+                        })
+                except (ValueError, TypeError):
+                    pass
+
+            if has_formula:
+                formula_cells += 1
+            elif value_text is not None:
+                input_cells += 1
+
+    results["summary"] = {
+        "total_cells_inspected": total_cells,
+        "formula_cells": formula_cells,
+        "input_cells": input_cells,
+        "violations": len(v),
+        "warnings": len(w),
+    }
+
+    return results
+
+
+def _load_from_xlsx(xlsx_path: str) -> tuple[bytes, list[tuple[str, bytes]]]:
+    """Load styles.xml and all sheet XMLs from a packed xlsx file."""
+    with zipfile.ZipFile(xlsx_path, "r") as z:
+        styles_xml = z.read("xl/styles.xml")
+
+        # Get sheet name mapping
+        wb_xml = z.read("xl/workbook.xml")
+        wb = ET.fromstring(wb_xml)
+        rel_ns = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
+        rels_xml = z.read("xl/_rels/workbook.xml.rels")
+        rels = ET.fromstring(rels_xml)
+
+        rid_to_name = {}
+        for sheet in wb.findall(f".//{{{NS}}}sheet"):
+            rid = sheet.get(f"{{{rel_ns}}}id", "")
+            name = sheet.get("name", "")
+            rid_to_name[rid] = name
+
+        rid_to_path = {}
+        for rel in rels:
+            rid = rel.get("Id", "")
+            target = rel.get("Target", "")
+            if "worksheets" in target:
+                if not target.startswith("xl/"):
+                    target = "xl/" + target
+                rid_to_path[rid] = target
+
+        sheet_xmls = []
+        for rid, name in rid_to_name.items():
+            path = rid_to_path.get(rid)
+            if path and path in z.namelist():
+                sheet_xmls.append((name, z.read(path)))
+
+    return styles_xml, sheet_xmls
+
+
+def _load_from_dir(unpacked_dir: str) -> tuple[bytes, list[tuple[str, bytes]]]:
+    """Load styles.xml and all sheet XMLs from an unpacked directory."""
+    styles_path = os.path.join(unpacked_dir, "xl", "styles.xml")
+    with open(styles_path, "rb") as f:
+        styles_xml = f.read()
+
+    # Get sheet names from workbook.xml
+    wb_path = os.path.join(unpacked_dir, "xl", "workbook.xml")
+    wb = ET.fromstring(open(wb_path, "rb").read())
+    rels_path = os.path.join(unpacked_dir, "xl", "_rels", "workbook.xml.rels")
+    rels = ET.fromstring(open(rels_path, "rb").read())
+
+    rel_ns = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
+    rid_to_name = {}
+    for sheet in wb.findall(f".//{{{NS}}}sheet"):
+        rid = sheet.get(f"{{{rel_ns}}}id", "")
+        name = sheet.get("name", "")
+        rid_to_name[rid] = name
+
+    rid_to_path = {}
+    for rel in rels:
+        rid = rel.get("Id", "")
+        target = rel.get("Target", "")
+        if "worksheets" in target:
+            rid_to_path[rid] = target
+
+    sheet_xmls = []
+    ws_dir = os.path.join(unpacked_dir, "xl", "worksheets")
+    for rid, name in rid_to_name.items():
+        rel_path = rid_to_path.get(rid, "")
+        # rel_path may be "worksheets/sheet1.xml" or absolute path
+        if rel_path.startswith("worksheets/"):
+            full = os.path.join(unpacked_dir, "xl", rel_path)
+        else:
+            full = os.path.join(unpacked_dir, "xl", "worksheets", os.path.basename(rel_path))
+        if os.path.exists(full):
+            with open(full, "rb") as f:
+                sheet_xmls.append((name, f.read()))
+
+    return styles_xml, sheet_xmls
+
+
+def main() -> None:
+    use_json = "--json" in sys.argv
+    summary_only = "--summary" in sys.argv
+
+    args_clean = [a for a in sys.argv[1:] if not a.startswith("--")]
+    if not args_clean:
+        print("Usage: style_audit.py <input.xlsx | unpacked_dir/> [--json] [--summary]")
+        sys.exit(1)
+
+    target = args_clean[0]
+
+    try:
+        if os.path.isdir(target):
+            styles_xml, sheet_xmls = _load_from_dir(target)
+        elif target.endswith(".xlsx") or target.endswith(".xlsm"):
+            styles_xml, sheet_xmls = _load_from_xlsx(target)
+        else:
+            print(f"ERROR: unrecognized target '{target}' — must be .xlsx file or unpacked directory")
+            sys.exit(1)
+    except Exception as e:
+        print(f"ERROR loading file: {e}")
+        sys.exit(1)
+
+    results = _audit(styles_xml, sheet_xmls)
+
+    if use_json:
+        print(json.dumps(results, indent=2, ensure_ascii=False))
+        sys.exit(1 if results["summary"]["violations"] > 0 else 0)
+
+    # Human-readable output
+    s = results["summary"]
+    print(f"Target  : {target}")
+    print(f"Cells   : {s['total_cells_inspected']} inspected  "
+          f"({s['formula_cells']} formula, {s['input_cells']} input)")
+    print(f"Violations : {s['violations']}")
+    print(f"Warnings   : {s['warnings']}")
+
+    if not summary_only:
+        if results["violations"]:
+            print("\n── Violations (must fix) ──")
+            for item in results["violations"]:
+                t = item["type"]
+                if t == "count_mismatch":
+                    print(f"  [FAIL] {item['element']} count mismatch: declared={item['declared']}, "
+                          f"actual={item['actual']}")
+                    print(f"         Fix: {item['fix']}")
+                elif t == "missing_required_fills":
+                    print(f"  [FAIL] {item['detail']}")
+                    print(f"         Fix: {item['fix']}")
+                elif t in ("fills_0_corrupted", "fills_1_corrupted"):
+                    print(f"  [FAIL] {item['detail']}")
+                    print(f"         Fix: {item['fix']}")
+                elif t == "formula_cell_blue_font":
+                    print(f"  [FAIL] [{item['sheet']}!{item['cell']}] formula cell has blue font "
+                          f"(role=input, but cell contains formula: {item.get('formula', '')})")
+                    print(f"         Fix: {item['fix']}")
+                elif t == "style_index_out_of_range":
+                    print(f"  [FAIL] [{item['sheet']}!{item['cell']}] s={item['s']} but "
+                          f"cellXfs count={item['cellXfs_count']}")
+                    print(f"         Fix: {item['fix']}")
+                elif t == "font_index_out_of_range":
+                    print(f"  [FAIL] [{item['sheet']}!{item['cell']}] fontId={item['fontId']} but "
+                          f"fonts count={item['fonts_count']}")
+                    print(f"         Fix: {item['fix']}")
+                elif t == "year_with_comma_format":
+                    print(f"  [FAIL] [{item['sheet']}!{item['cell']}] year value {item['value']} "
+                          f"uses comma-format (numFmtId={item['numFmtId']}) — will display as "
+                          f"{int(float(item['value'])):,}")
+                    print(f"         Fix: {item['fix']}")
+                else:
+                    print(f"  [FAIL] {item}")
+
+        if results["warnings"] and not summary_only:
+            print("\n── Warnings (review recommended) ──")
+            for item in results["warnings"]:
+                t = item["type"]
+                if t == "numeric_input_may_lack_blue":
+                    print(f"  [WARN] [{item['sheet']}!{item['cell']}] numeric value={item['value']} "
+                          f"has black font — if user-editable assumption, use blue-font input style")
+                elif t == "percent_value_gt_1":
+                    print(f"  [WARN] [{item['sheet']}!{item['cell']}] percent-format cell has "
+                          f"value={item['value']} (displays as {item['displayed_as']}) — "
+                          f"likely should be stored as decimal (e.g. 0.08 for 8%)")
+                else:
+                    print(f"  [WARN] {item}")
+
+    print()
+    if s["violations"] == 0:
+        if s["warnings"] == 0:
+            print("PASS — Financial formatting is compliant")
+        else:
+            print(f"PASS with WARN — {s['warnings']} warning(s) need review")
+    else:
+        print(f"FAIL — {s['violations']} violation(s) must be fixed before delivery")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/minimax-xlsx/scripts/xlsx_add_column.py
+++ b/minimax-xlsx/scripts/xlsx_add_column.py
@@ -0,0 +1,395 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+"""
+xlsx_add_column.py — Add a new column to a worksheet in an unpacked xlsx.
+
+Usage examples:
+    # Add a percentage column with formulas and number format
+    python3 xlsx_add_column.py /tmp/work/ --col G \\
+        --sheet "Budget FY2025" \\
+        --header "% of Total" \\
+        --formula '=F{row}/$F$10' --formula-rows 2:9 \\
+        --total-row 10 --total-formula '=SUM(G2:G9)' \\
+        --numfmt '0.0%'
+
+What it does:
+  1. Adds header cell (copies style from previous column's header)
+  2. Adds formula cells for the specified row range
+  3. Adds a total formula cell if specified
+  4. Creates a new cell style with the given numfmt if needed
+  5. Updates sharedStrings.xml for header text
+  6. Updates dimension ref and column definitions
+
+IMPORTANT: Run on an UNPACKED directory (from xlsx_unpack.py).
+After running, repack with xlsx_pack.py.
+"""
+
+import argparse
+import copy
+import os
+import re
+import sys
+import xml.dom.minidom
+import xml.etree.ElementTree as ET
+
+NS_SS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
+NS_REL = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
+
+ET.register_namespace('', NS_SS)
+ET.register_namespace('r', NS_REL)
+ET.register_namespace('xdr', 'http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing')
+ET.register_namespace('x14', 'http://schemas.microsoft.com/office/spreadsheetml/2009/9/main')
+ET.register_namespace('xr2', 'http://schemas.microsoft.com/office/spreadsheetml/2015/revision2')
+ET.register_namespace('mc', 'http://schemas.openxmlformats.org/markup-compatibility/2006')
+
+
+def _tag(local: str) -> str:
+    return f"{{{NS_SS}}}{local}"
+
+
+def _write_tree(tree: ET.ElementTree, path: str) -> None:
+    tree.write(path, encoding="unicode", xml_declaration=False)
+    with open(path, "r", encoding="utf-8") as fh:
+        raw = fh.read()
+    try:
+        dom = xml.dom.minidom.parseString(raw.encode("utf-8"))
+        pretty = dom.toprettyxml(indent="  ", encoding="utf-8").decode("utf-8")
+        lines = [line for line in pretty.splitlines() if line.strip()]
+        with open(path, "w", encoding="utf-8") as fh:
+            fh.write("\n".join(lines) + "\n")
+    except Exception:
+        pass
+
+
+def col_number(s: str) -> int:
+    n = 0
+    for c in s.upper():
+        n = n * 26 + (ord(c) - 64)
+    return n
+
+
+def col_letter(n: int) -> str:
+    r = ""
+    while n > 0:
+        n, rem = divmod(n - 1, 26)
+        r = chr(65 + rem) + r
+    return r
+
+
+def find_ws_path(work_dir: str, sheet_name: str | None) -> str:
+    wb_tree = ET.parse(os.path.join(work_dir, "xl", "workbook.xml"))
+    rid = None
+    for sheet in wb_tree.getroot().iter(_tag("sheet")):
+        if sheet_name is None or sheet.get("name") == sheet_name:
+            rid = sheet.get(f"{{{NS_REL}}}id")
+            break
+
+    if rid is None:
+        print(f"ERROR: Sheet not found: {sheet_name}")
+        sys.exit(1)
+
+    rels_tree = ET.parse(os.path.join(work_dir, "xl", "_rels", "workbook.xml.rels"))
+    for rel in rels_tree.getroot():
+        if rel.get("Id") == rid:
+            return os.path.join(work_dir, "xl", rel.get("Target"))
+
+    print(f"ERROR: Relationship not found: {rid}")
+    sys.exit(1)
+
+
+def add_shared_string(work_dir: str, text: str) -> int:
+    ss_path = os.path.join(work_dir, "xl", "sharedStrings.xml")
+    tree = ET.parse(ss_path)
+    root = tree.getroot()
+
+    idx = 0
+    for si in root.findall(_tag("si")):
+        t_el = si.find(_tag("t"))
+        if t_el is not None and t_el.text == text:
+            return idx
+        idx += 1
+
+    si = ET.SubElement(root, _tag("si"))
+    t = ET.SubElement(si, _tag("t"))
+    t.set("{http://www.w3.org/XML/1998/namespace}space", "preserve")
+    t.text = text
+
+    root.set("count", str(int(root.get("count", "0")) + 1))
+    root.set("uniqueCount", str(int(root.get("uniqueCount", "0")) + 1))
+
+    _write_tree(tree, ss_path)
+    return idx
+
+
+def get_cell_style(ws_tree: ET.ElementTree, col: str, row: int) -> int:
+    ref = f"{col}{row}"
+    for row_el in ws_tree.getroot().iter(_tag("row")):
+        if row_el.get("r") == str(row):
+            for c in row_el:
+                if c.get("r") == ref:
+                    return int(c.get("s", "0"))
+    return 0
+
+
+def ensure_numfmt_style(work_dir: str, ref_style_idx: int, numfmt_code: str) -> int:
+    """Clone a cellXfs entry with the given numfmt. Returns new style index."""
+    styles_path = os.path.join(work_dir, "xl", "styles.xml")
+    tree = ET.parse(styles_path)
+    root = tree.getroot()
+
+    # Find or add numFmt
+    numfmts = root.find(_tag("numFmts"))
+    numfmt_id = None
+    if numfmts is not None:
+        for nf in numfmts:
+            if nf.get("formatCode") == numfmt_code:
+                numfmt_id = int(nf.get("numFmtId"))
+                break
+
+    if numfmt_id is None:
+        max_id = 163
+        if numfmts is not None:
+            for nf in numfmts:
+                max_id = max(max_id, int(nf.get("numFmtId", "0")))
+        else:
+            numfmts = ET.SubElement(root, _tag("numFmts"))
+            numfmts.set("count", "0")
+            root.remove(numfmts)
+            root.insert(0, numfmts)
+
+        numfmt_id = max_id + 1
+        nf = ET.SubElement(numfmts, _tag("numFmt"))
+        nf.set("numFmtId", str(numfmt_id))
+        nf.set("formatCode", numfmt_code)
+        numfmts.set("count", str(len(list(numfmts))))
+
+    # Find or create cellXfs entry
+    cellxfs = root.find(_tag("cellXfs"))
+    xf_list = list(cellxfs)
+    ref_xf = xf_list[min(ref_style_idx, len(xf_list) - 1)]
+
+    for i, xf in enumerate(xf_list):
+        if (xf.get("numFmtId") == str(numfmt_id) and
+                xf.get("fontId") == ref_xf.get("fontId") and
+                xf.get("fillId") == ref_xf.get("fillId") and
+                xf.get("borderId") == ref_xf.get("borderId")):
+            return i
+
+    new_xf = copy.deepcopy(ref_xf)
+    new_xf.set("numFmtId", str(numfmt_id))
+    new_xf.set("applyNumberFormat", "true")
+    cellxfs.append(new_xf)
+    cellxfs.set("count", str(len(list(cellxfs))))
+
+    _write_tree(tree, styles_path)
+    return len(list(cellxfs)) - 1
+
+
+def _apply_border_to_row(work_dir: str, ws_path: str, ws_tree: ET.ElementTree,
+                         ws_root: ET.Element, row_map: dict, border_row: int,
+                         border_style: str, new_col: str) -> None:
+    """Apply a top border to ALL cells in the specified row (A through new_col)."""
+    styles_path = os.path.join(work_dir, "xl", "styles.xml")
+    st_tree = ET.parse(styles_path)
+    st_root = st_tree.getroot()
+
+    # 1. Create a new border entry with the specified top style
+    borders = st_root.find(_tag("borders"))
+    new_border = ET.SubElement(borders, _tag("border"))
+    for side in ("left", "right"):
+        ET.SubElement(new_border, _tag(side))
+    top_el = ET.SubElement(new_border, _tag("top"))
+    top_el.set("style", border_style)
+    ET.SubElement(new_border, _tag("bottom"))
+    ET.SubElement(new_border, _tag("diagonal"))
+    borders.set("count", str(len(list(borders))))
+    new_border_id = len(list(borders)) - 1
+
+    # 2. For each existing style used in the row, create a clone with the new borderId
+    cellxfs = st_root.find(_tag("cellXfs"))
+    style_remap = {}  # old_style_idx -> new_style_idx
+
+    if border_row not in row_map:
+        return
+
+    row_el = row_map[border_row]
+    # Collect all cells in this row and their styles
+    for c in row_el:
+        old_s = int(c.get("s", "0"))
+        if old_s not in style_remap:
+            xf_list = list(cellxfs)
+            ref_xf = xf_list[min(old_s, len(xf_list) - 1)]
+            new_xf = copy.deepcopy(ref_xf)
+            new_xf.set("borderId", str(new_border_id))
+            new_xf.set("applyBorder", "true")
+            cellxfs.append(new_xf)
+            cellxfs.set("count", str(len(list(cellxfs))))
+            style_remap[old_s] = len(list(cellxfs)) - 1
+
+    # 3. Apply remapped styles to all cells in the row
+    for c in row_el:
+        old_s = int(c.get("s", "0"))
+        if old_s in style_remap:
+            c.set("s", str(style_remap[old_s]))
+
+    _write_tree(st_tree, styles_path)
+    last_col_num = col_number(new_col)
+    print(f"  Applied {border_style} top border to all cells in row {border_row} "
+          f"(A-{new_col}, {len(style_remap)} style(s) cloned)")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Add a column to a worksheet in an unpacked xlsx")
+    parser.add_argument("work_dir", help="Unpacked xlsx working directory")
+    parser.add_argument("--col", required=True, help="Column letter (e.g., G)")
+    parser.add_argument("--sheet", default=None, help="Sheet name (default: first)")
+    parser.add_argument("--header", default=None, help="Header text for row 1")
+    parser.add_argument("--formula", default=None,
+                        help="Formula template with {row} placeholder")
+    parser.add_argument("--formula-rows", default=None,
+                        help="Row range for formulas (e.g., 2:9)")
+    parser.add_argument("--total-row", type=int, default=None,
+                        help="Row number for total formula")
+    parser.add_argument("--total-formula", default=None,
+                        help="Formula for total row")
+    parser.add_argument("--numfmt", default=None,
+                        help="Number format for data/total cells (e.g., 0.0%%)")
+    parser.add_argument("--border-row", type=int, default=None,
+                        help="Row to apply a top border to ALL cells (e.g., 10)")
+    parser.add_argument("--border-style", default="medium",
+                        help="Border style: thin, medium, thick (default: medium)")
+    args = parser.parse_args()
+
+    col = args.col.upper()
+    prev_col = col_letter(col_number(col) - 1) if col_number(col) > 1 else "A"
+
+    ws_path = find_ws_path(args.work_dir, args.sheet)
+    ws_tree = ET.parse(ws_path)
+    changes = 0
+
+    print(f"Adding column {col} to {os.path.basename(ws_path)}")
+
+    # Resolve styles from previous column
+    header_style = get_cell_style(ws_tree, prev_col, 1) if args.header else 0
+
+    data_style = None
+    if args.formula_rows:
+        start_row = int(args.formula_rows.split(":")[0])
+        ref = get_cell_style(ws_tree, prev_col, start_row)
+        data_style = (ensure_numfmt_style(args.work_dir, ref, args.numfmt)
+                      if args.numfmt else ref)
+
+    total_style = None
+    if args.total_row:
+        ref = get_cell_style(ws_tree, prev_col, args.total_row)
+        total_style = (ensure_numfmt_style(args.work_dir, ref, args.numfmt)
+                       if args.numfmt else ref)
+
+    # Add header to sharedStrings
+    header_idx = add_shared_string(args.work_dir, args.header) if args.header else None
+
+    # Re-parse worksheet (sharedStrings write may have changed state)
+    ws_tree = ET.parse(ws_path)
+    root = ws_tree.getroot()
+    sheet_data = root.find(_tag("sheetData"))
+
+    row_map = {}
+    for row_el in sheet_data:
+        r = row_el.get("r")
+        if r:
+            row_map[int(r)] = row_el
+
+    # Add header cell
+    if args.header and 1 in row_map:
+        cell = ET.SubElement(row_map[1], _tag("c"))
+        cell.set("r", f"{col}1")
+        cell.set("s", str(header_style))
+        cell.set("t", "s")
+        v = ET.SubElement(cell, _tag("v"))
+        v.text = str(header_idx)
+        changes += 1
+        print(f"  {col}1 = \"{args.header}\" (header, style={header_style})")
+
+    # Add formula cells
+    if args.formula and args.formula_rows:
+        start, end = map(int, args.formula_rows.split(":"))
+        for row_num in range(start, end + 1):
+            if row_num not in row_map:
+                row_el = ET.SubElement(sheet_data, _tag("row"))
+                row_el.set("r", str(row_num))
+                row_map[row_num] = row_el
+
+            formula_text = args.formula.replace("{row}", str(row_num))
+            formula_text = formula_text.lstrip("=")
+            cell = ET.SubElement(row_map[row_num], _tag("c"))
+            cell.set("r", f"{col}{row_num}")
+            if data_style is not None:
+                cell.set("s", str(data_style))
+            f_el = ET.SubElement(cell, _tag("f"))
+            f_el.text = formula_text
+            changes += 1
+
+        print(f"  {col}{start}:{col}{end} = formulas (style={data_style})")
+
+    # Add total formula
+    if args.total_row and args.total_formula:
+        if args.total_row not in row_map:
+            row_el = ET.SubElement(sheet_data, _tag("row"))
+            row_el.set("r", str(args.total_row))
+            row_map[args.total_row] = row_el
+
+        total_f = args.total_formula.lstrip("=")
+        cell = ET.SubElement(row_map[args.total_row], _tag("c"))
+        cell.set("r", f"{col}{args.total_row}")
+        if total_style is not None:
+            cell.set("s", str(total_style))
+        f_el = ET.SubElement(cell, _tag("f"))
+        f_el.text = total_f
+        changes += 1
+        print(f"  {col}{args.total_row} = ={total_f} (style={total_style})")
+
+    # Update dimension
+    for dim in root.iter(_tag("dimension")):
+        old_ref = dim.get("ref", "")
+        if ":" in old_ref:
+            start_ref, end_ref = old_ref.split(":")
+            end_col_str = re.match(r"([A-Z]+)", end_ref).group(1)
+            end_row_str = re.search(r"(\d+)", end_ref).group(1)
+            if col_number(col) > col_number(end_col_str):
+                new_ref = f"{start_ref}:{col}{end_row_str}"
+                dim.set("ref", new_ref)
+                print(f"  Dimension: {old_ref} → {new_ref}")
+
+    # Extend <cols> to cover new column
+    cols_el = root.find(_tag("cols"))
+    if cols_el is not None:
+        new_col_num = col_number(col)
+        covered = any(
+            int(c.get("min", "0")) <= new_col_num <= int(c.get("max", "0"))
+            for c in cols_el
+        )
+        if not covered:
+            prev_num = col_number(prev_col)
+            for c in cols_el:
+                if int(c.get("min", "0")) <= prev_num <= int(c.get("max", "0")):
+                    new_col_def = copy.deepcopy(c)
+                    new_col_def.set("min", str(new_col_num))
+                    new_col_def.set("max", str(new_col_num))
+                    cols_el.append(new_col_def)
+                    print(f"  Added <col> definition for column {col}")
+                    break
+
+    # Apply border to entire row if requested
+    if args.border_row:
+        _apply_border_to_row(args.work_dir, ws_path, ws_tree, root,
+                             row_map, args.border_row, args.border_style,
+                             col)
+
+    _write_tree(ws_tree, ws_path)
+    print(f"\nDone. {changes} cells added.")
+    print(f"\nNext: python3 xlsx_pack.py {args.work_dir} output.xlsx")
+
+
+if __name__ == "__main__":
+    main()
--- a/minimax-xlsx/scripts/xlsx_insert_row.py
+++ b/minimax-xlsx/scripts/xlsx_insert_row.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+"""
+xlsx_insert_row.py — Insert a new data row into a worksheet in an unpacked xlsx.
+
+Usage examples:
+    # Insert "Utilities" row at position 6, copying styles from row 5
+    python3 xlsx_insert_row.py /tmp/work/ --at 6 \\
+        --sheet "Budget FY2025" \\
+        --text A=Utilities \\
+        --values B=3000 C=3000 D=3500 E=3500 \\
+        --formula 'F=SUM(B{row}:E{row})' \\
+        --copy-style-from 5
+
+What it does:
+  1. Shifts all rows >= at down by 1 (calls xlsx_shift_rows.py)
+  2. Adds text values to sharedStrings.xml
+  3. Inserts new row with specified cells (text, numbers, formulas)
+  4. Copies cell styles from a reference row
+  5. Updates dimension ref
+
+The shift operation automatically expands SUM formulas that span the
+insertion point, so total-row formulas are updated without extra work.
+
+IMPORTANT: Run on an UNPACKED directory (from xlsx_unpack.py).
+After running, repack with xlsx_pack.py.
+"""
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+import xml.dom.minidom
+import xml.etree.ElementTree as ET
+
+NS_SS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
+NS_REL = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
+
+ET.register_namespace('', NS_SS)
+ET.register_namespace('r', NS_REL)
+ET.register_namespace('xdr', 'http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing')
+ET.register_namespace('x14', 'http://schemas.microsoft.com/office/spreadsheetml/2009/9/main')
+ET.register_namespace('xr2', 'http://schemas.microsoft.com/office/spreadsheetml/2015/revision2')
+ET.register_namespace('mc', 'http://schemas.openxmlformats.org/markup-compatibility/2006')
+
+
+def _tag(local: str) -> str:
+    return f"{{{NS_SS}}}{local}"
+
+
+def _write_tree(tree: ET.ElementTree, path: str) -> None:
+    tree.write(path, encoding="unicode", xml_declaration=False)
+    with open(path, "r", encoding="utf-8") as fh:
+        raw = fh.read()
+    try:
+        dom = xml.dom.minidom.parseString(raw.encode("utf-8"))
+        pretty = dom.toprettyxml(indent="  ", encoding="utf-8").decode("utf-8")
+        lines = [line for line in pretty.splitlines() if line.strip()]
+        with open(path, "w", encoding="utf-8") as fh:
+            fh.write("\n".join(lines) + "\n")
+    except Exception:
+        pass
+
+
+def col_number(s: str) -> int:
+    n = 0
+    for c in s.upper():
+        n = n * 26 + (ord(c) - 64)
+    return n
+
+
+def find_ws_path(work_dir: str, sheet_name: str | None) -> str:
+    wb_tree = ET.parse(os.path.join(work_dir, "xl", "workbook.xml"))
+    rid = None
+    for sheet in wb_tree.getroot().iter(_tag("sheet")):
+        if sheet_name is None or sheet.get("name") == sheet_name:
+            rid = sheet.get(f"{{{NS_REL}}}id")
+            break
+
+    if rid is None:
+        print(f"ERROR: Sheet not found: {sheet_name}")
+        sys.exit(1)
+
+    rels_tree = ET.parse(os.path.join(work_dir, "xl", "_rels", "workbook.xml.rels"))
+    for rel in rels_tree.getroot():
+        if rel.get("Id") == rid:
+            return os.path.join(work_dir, "xl", rel.get("Target"))
+
+    print(f"ERROR: Relationship not found: {rid}")
+    sys.exit(1)
+
+
+def add_shared_string(work_dir: str, text: str) -> int:
+    ss_path = os.path.join(work_dir, "xl", "sharedStrings.xml")
+    tree = ET.parse(ss_path)
+    root = tree.getroot()
+
+    idx = 0
+    for si in root.findall(_tag("si")):
+        t_el = si.find(_tag("t"))
+        if t_el is not None and t_el.text == text:
+            return idx
+        idx += 1
+
+    si = ET.SubElement(root, _tag("si"))
+    t = ET.SubElement(si, _tag("t"))
+    t.set("{http://www.w3.org/XML/1998/namespace}space", "preserve")
+    t.text = text
+
+    root.set("count", str(int(root.get("count", "0")) + 1))
+    root.set("uniqueCount", str(int(root.get("uniqueCount", "0")) + 1))
+
+    _write_tree(tree, ss_path)
+    return idx
+
+
+def get_row_styles(ws_tree: ET.ElementTree, row_num: int) -> dict[str, int]:
+    """Get {col_letter: style_index} for all cells in a row."""
+    styles = {}
+    for row_el in ws_tree.getroot().iter(_tag("row")):
+        if row_el.get("r") == str(row_num):
+            for c in row_el:
+                ref = c.get("r", "")
+                col_str = re.match(r"([A-Z]+)", ref)
+                if col_str:
+                    styles[col_str.group(1)] = int(c.get("s", "0"))
+            break
+    return styles
+
+
+def parse_kv(specs: list[str] | None) -> dict[str, str]:
+    if not specs:
+        return {}
+    result = {}
+    for spec in specs:
+        col, _, val = spec.partition("=")
+        result[col.upper()] = val
+    return result
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Insert a new row into a worksheet in an unpacked xlsx")
+    parser.add_argument("work_dir", help="Unpacked xlsx working directory")
+    parser.add_argument("--at", type=int, required=True,
+                        help="Row number to insert at (existing rows shift down)")
+    parser.add_argument("--sheet", default=None, help="Sheet name (default: first)")
+    parser.add_argument("--text", nargs="+", default=None,
+                        help="Text cells: COL=VALUE (e.g., A=Utilities)")
+    parser.add_argument("--values", nargs="+", default=None,
+                        help="Numeric cells: COL=VALUE (e.g., B=3000 C=3000)")
+    parser.add_argument("--formula", nargs="+", default=None,
+                        help="Formula cells: COL=FORMULA with {row} (e.g., F=SUM(B{row}:E{row}))")
+    parser.add_argument("--copy-style-from", type=int, default=None,
+                        help="Copy cell styles from this row number")
+    args = parser.parse_args()
+
+    at = args.at
+    text_cells = parse_kv(args.text)
+    num_cells = parse_kv(args.values)
+    formula_cells = parse_kv(args.formula)
+
+    # Step 1: Shift rows down using xlsx_shift_rows.py
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    shift_script = os.path.join(script_dir, "xlsx_shift_rows.py")
+
+    print(f"Step 1: Shifting rows >= {at} down by 1...")
+    result = subprocess.run(
+        [sys.executable, shift_script, args.work_dir, "insert", str(at), "1"],
+        capture_output=True, text=True,
+    )
+    if result.returncode != 0:
+        print(f"ERROR: shift_rows failed:\n{result.stderr}")
+        sys.exit(1)
+    print(result.stdout)
+
+    # Step 2: Resolve worksheet path and get reference styles
+    ws_path = find_ws_path(args.work_dir, args.sheet)
+    ws_tree = ET.parse(ws_path)
+
+    ref_styles = {}
+    if args.copy_style_from is not None:
+        ref_styles = get_row_styles(ws_tree, args.copy_style_from)
+        print(f"Step 2: Copied styles from row {args.copy_style_from}: {ref_styles}")
+
+    # Step 3: Add text values to sharedStrings
+    text_indices = {}
+    for col, text in text_cells.items():
+        text_indices[col] = add_shared_string(args.work_dir, text)
+        print(f"  Added shared string: \"{text}\" → index {text_indices[col]}")
+
+    # Step 4: Re-parse worksheet and build new row
+    ws_tree = ET.parse(ws_path)
+    root = ws_tree.getroot()
+    sheet_data = root.find(_tag("sheetData"))
+
+    new_row = ET.Element(_tag("row"))
+    new_row.set("r", str(at))
+
+    all_cols = sorted(
+        set(list(text_cells) + list(num_cells) + list(formula_cells)),
+        key=col_number,
+    )
+
+    for col in all_cols:
+        cell = ET.SubElement(new_row, _tag("c"))
+        cell.set("r", f"{col}{at}")
+
+        if col in ref_styles:
+            cell.set("s", str(ref_styles[col]))
+
+        if col in text_cells:
+            cell.set("t", "s")
+            v = ET.SubElement(cell, _tag("v"))
+            v.text = str(text_indices[col])
+        elif col in num_cells:
+            # Omit t attribute for numbers — "n" is the default per OOXML spec
+            v = ET.SubElement(cell, _tag("v"))
+            v.text = str(num_cells[col])
+        elif col in formula_cells:
+            formula_text = formula_cells[col].replace("{row}", str(at)).lstrip("=")
+            f_el = ET.SubElement(cell, _tag("f"))
+            f_el.text = formula_text
+            # Use formula style from reference if available; it may differ
+            # from the data style (e.g., black font vs blue font).
+            # Look for the formula column's style specifically.
+            if col in ref_styles:
+                cell.set("s", str(ref_styles[col]))
+
+    # Insert new row at the correct position in sheetData (sorted by row number)
+    insert_idx = 0
+    for i, row_el in enumerate(list(sheet_data)):
+        r = row_el.get("r")
+        if r and int(r) > at:
+            insert_idx = i
+            break
+        insert_idx = i + 1
+
+    sheet_data.insert(insert_idx, new_row)
+
+    print(f"\nStep 3: Inserted row {at} with {len(all_cols)} cells:")
+    for col in all_cols:
+        if col in text_cells:
+            print(f"  {col}{at} = \"{text_cells[col]}\" (text)")
+        elif col in num_cells:
+            print(f"  {col}{at} = {num_cells[col]} (number)")
+        elif col in formula_cells:
+            ftext = formula_cells[col].replace("{row}", str(at))
+            print(f"  {col}{at} = {ftext} (formula)")
+
+    # Step 5: Update dimension
+    for dim in root.iter(_tag("dimension")):
+        old_ref = dim.get("ref", "")
+        if ":" in old_ref:
+            start_ref, end_ref = old_ref.split(":")
+            end_row = int(re.search(r"(\d+)", end_ref).group(1))
+            end_col = re.match(r"([A-Z]+)", end_ref).group(1)
+            # Dimension was already shifted by shift_rows, just verify
+            max_col = max(col_number(end_col), max(col_number(c) for c in all_cols))
+            max_col_letter = end_col if col_number(end_col) >= max_col else col
+            new_ref = f"{start_ref}:{max_col_letter}{end_row}"
+            if new_ref != old_ref:
+                dim.set("ref", new_ref)
+                print(f"\n  Dimension: {old_ref} → {new_ref}")
+
+    _write_tree(ws_tree, ws_path)
+
+    print(f"\nDone. Row {at} inserted successfully.")
+    print(f"\nNext: python3 xlsx_pack.py {args.work_dir} output.xlsx")
+
+
+if __name__ == "__main__":
+    main()
--- a/minimax-xlsx/scripts/xlsx_pack.py
+++ b/minimax-xlsx/scripts/xlsx_pack.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+"""
+xlsx_pack.py — Pack a working directory back into a valid xlsx file.
+
+Usage:
+    python3 xlsx_pack.py <source_dir> <output.xlsx>
+
+Requirements:
+    - source_dir must contain [Content_Types].xml at its root
+    - All XML files are re-validated for well-formedness before packing
+
+The resulting xlsx is a valid ZIP archive with correct OOXML structure.
+"""
+
+import sys
+import os
+import zipfile
+import xml.etree.ElementTree as ET
+
+
+def validate_xml_files(source_dir: str) -> list[str]:
+    """Return list of XML files that fail to parse."""
+    bad = []
+    for dirpath, _, filenames in os.walk(source_dir):
+        for fname in filenames:
+            if fname.endswith(".xml") or fname.endswith(".rels"):
+                fpath = os.path.join(dirpath, fname)
+                try:
+                    ET.parse(fpath)
+                except ET.ParseError as e:
+                    rel = os.path.relpath(fpath, source_dir)
+                    bad.append(f"{rel}: {e}")
+    return bad
+
+
+def pack(source_dir: str, xlsx_path: str) -> None:
+    if not os.path.isdir(source_dir):
+        print(f"ERROR: Directory not found: {source_dir}", file=sys.stderr)
+        sys.exit(1)
+
+    content_types = os.path.join(source_dir, "[Content_Types].xml")
+    if not os.path.isfile(content_types):
+        print(
+            f"ERROR: Missing [Content_Types].xml in {source_dir}\n"
+            "  This file is required at the root of every valid xlsx package.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    # Validate XML well-formedness before packing
+    print("Validating XML files...")
+    bad_files = validate_xml_files(source_dir)
+    if bad_files:
+        print("ERROR: The following files have XML parse errors:", file=sys.stderr)
+        for b in bad_files:
+            print(f"  {b}", file=sys.stderr)
+        print(
+            "\nFix all XML errors before packing. "
+            "A malformed xlsx cannot be opened by Excel or LibreOffice.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    print("✓ All XML files are well-formed")
+
+    # Count files to pack
+    file_count = sum(len(files) for _, _, files in os.walk(source_dir))
+
+    with zipfile.ZipFile(xlsx_path, "w", compression=zipfile.ZIP_DEFLATED) as z:
+        for dirpath, _, filenames in os.walk(source_dir):
+            for fname in filenames:
+                fpath = os.path.join(dirpath, fname)
+                arcname = os.path.relpath(fpath, source_dir)
+                z.write(fpath, arcname)
+
+    size = os.path.getsize(xlsx_path)
+    print(f"Packed {file_count} files → '{xlsx_path}' ({size:,} bytes)")
+    print("\nNext step: run formula_check.py to validate formulas:")
+    print(f"  python3 formula_check.py {xlsx_path}")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: xlsx_pack.py <source_dir> <output.xlsx>")
+        sys.exit(1)
+    pack(sys.argv[1], sys.argv[2])
--- a/minimax-xlsx/scripts/xlsx_reader.py
+++ b/minimax-xlsx/scripts/xlsx_reader.py
@@ -0,0 +1,362 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+"""
+xlsx_reader.py — Structure discovery and data analysis tool for Excel/CSV files.
+
+Usage:
+    python3 xlsx_reader.py <file>                   # full structure report
+    python3 xlsx_reader.py <file> --sheet Sales     # analyze one sheet
+    python3 xlsx_reader.py <file> --json            # machine-readable output
+    python3 xlsx_reader.py <file> --quality         # data quality audit only
+
+Supports: .xlsx, .xlsm, .csv, .tsv
+Does NOT modify the source file in any way.
+
+Exit codes:
+    0 — success
+    1 — file not found / unsupported format / encoding failure
+"""
+
+import sys
+import json
+import argparse
+from pathlib import Path
+
+
+# ---------------------------------------------------------------------------
+# Format detection and loading
+# ---------------------------------------------------------------------------
+
+def detect_and_load(file_path: str, sheet_name_filter: str | None = None) -> dict:
+    """
+    Load file into {sheet_name: DataFrame} dict.
+    CSV/TSV files are mapped to a single-key dict using the file stem as key.
+
+    Raises ValueError for unsupported formats or encoding failures.
+    """
+    try:
+        import pandas as pd
+    except ImportError:
+        raise RuntimeError(
+            "pandas is not installed. Run: pip install pandas openpyxl"
+        )
+
+    path = Path(file_path)
+    if not path.exists():
+        raise FileNotFoundError(f"File not found: {file_path}")
+
+    suffix = path.suffix.lower()
+
+    if suffix in (".xlsx", ".xlsm"):
+        target = sheet_name_filter if sheet_name_filter else None
+        result = pd.read_excel(file_path, sheet_name=target)
+        # pd.read_excel with sheet_name=None returns dict; with a name, returns DataFrame
+        if isinstance(result, dict):
+            return result
+        else:
+            return {sheet_name_filter: result}
+
+    elif suffix in (".csv", ".tsv"):
+        sep = "\t" if suffix == ".tsv" else ","
+        encodings = ["utf-8-sig", "gbk", "utf-8", "latin-1"]
+        last_error = None
+        for enc in encodings:
+            try:
+                import pandas as pd
+                df = pd.read_csv(file_path, sep=sep, encoding=enc)
+                df._reader_encoding = enc  # attach metadata (non-standard, for reporting)
+                return {path.stem: df}
+            except (UnicodeDecodeError, Exception) as e:
+                last_error = e
+                continue
+        raise ValueError(
+            f"Cannot decode {file_path}. Tried encodings: {encodings}. "
+            f"Last error: {last_error}"
+        )
+
+    elif suffix == ".xls":
+        raise ValueError(
+            ".xls is a legacy binary format not supported by this tool. "
+            "Please open the file in Excel and save as .xlsx, then retry."
+        )
+
+    else:
+        raise ValueError(
+            f"Unsupported file format: {suffix}. "
+            "Supported formats: .xlsx, .xlsm, .csv, .tsv"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Structure discovery
+# ---------------------------------------------------------------------------
+
+def explore_structure(sheets: dict) -> dict:
+    """
+    Return a structured dict describing each sheet.
+    Keys: sheet_name -> {shape, columns, dtypes, null_counts, preview}
+    """
+    result = {}
+    for sheet_name, df in sheets.items():
+        null_counts = df.isnull().sum()
+        null_info = {
+            col: {"count": int(cnt), "pct": round(cnt / max(len(df), 1) * 100, 1)}
+            for col, cnt in null_counts.items()
+            if cnt > 0
+        }
+        result[sheet_name] = {
+            "shape": {"rows": df.shape[0], "cols": df.shape[1]},
+            "columns": list(df.columns),
+            "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()},
+            "null_columns": null_info,
+            "preview": df.head(5).to_dict(orient="records"),
+        }
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Data quality audit
+# ---------------------------------------------------------------------------
+
+def audit_quality(sheets: dict) -> dict:
+    """
+    Return data quality findings per sheet.
+    Checks: nulls, duplicates, mixed-type columns, potential year formatting issues.
+    """
+    import pandas as pd
+
+    findings = {}
+    for sheet_name, df in sheets.items():
+        sheet_findings = []
+
+        # Null values
+        null_counts = df.isnull().sum()
+        for col, cnt in null_counts.items():
+            if cnt > 0:
+                pct = round(cnt / max(len(df), 1) * 100, 1)
+                sheet_findings.append({
+                    "type": "null_values",
+                    "column": col,
+                    "count": int(cnt),
+                    "pct": pct,
+                    "note": f"Column '{col}' has {cnt} null values ({pct}%). "
+                            "If this column contains Excel formulas, null values may "
+                            "indicate that the formula cache has not been populated "
+                            "(file was never opened in Excel after the formulas were written)."
+                })
+
+        # Duplicate rows
+        dup_count = int(df.duplicated().sum())
+        if dup_count > 0:
+            sheet_findings.append({
+                "type": "duplicate_rows",
+                "count": dup_count,
+                "note": f"{dup_count} fully duplicate rows found."
+            })
+
+        # Mixed-type object columns (numeric data stored as text)
+        for col in df.select_dtypes(include="object").columns:
+            numeric_converted = pd.to_numeric(df[col], errors="coerce")
+            convertible = int(numeric_converted.notna().sum())
+            non_null_total = int(df[col].notna().sum())
+            if 0 < convertible < non_null_total:
+                sheet_findings.append({
+                    "type": "mixed_type",
+                    "column": col,
+                    "convertible_to_numeric": convertible,
+                    "non_convertible": non_null_total - convertible,
+                    "note": f"Column '{col}' appears to contain mixed types: "
+                            f"{convertible} values can be parsed as numbers, "
+                            f"{non_null_total - convertible} cannot. "
+                            "Use pd.to_numeric(df[col], errors='coerce') to unify."
+                })
+
+        # Year column formatting (e.g., 2024.0 stored as float)
+        for col in df.select_dtypes(include="number").columns:
+            col_lower = str(col).lower()
+            # "年" is the Chinese character for "year" — detect year columns in CJK spreadsheets
+            if "year" in col_lower or "yr" in col_lower or "年" in col_lower:
+                if df[col].dropna().between(1900, 2200).all():
+                    if df[col].dtype == float:
+                        sheet_findings.append({
+                            "type": "year_as_float",
+                            "column": col,
+                            "note": f"Column '{col}' appears to be a year column stored as float "
+                                    "(e.g., 2024.0). Convert with df[col].astype(int).astype(str) "
+                                    "to get clean year strings like '2024'."
+                        })
+
+        # Outliers via IQR on numeric columns
+        for col in df.select_dtypes(include="number").columns:
+            series = df[col].dropna()
+            if len(series) < 4:
+                continue
+            Q1, Q3 = series.quantile(0.25), series.quantile(0.75)
+            IQR = Q3 - Q1
+            if IQR == 0:
+                continue
+            outlier_mask = (df[col] < Q1 - 1.5 * IQR) | (df[col] > Q3 + 1.5 * IQR)
+            outlier_count = int(outlier_mask.sum())
+            if outlier_count > 0:
+                sheet_findings.append({
+                    "type": "outliers_iqr",
+                    "column": col,
+                    "count": outlier_count,
+                    "note": f"Column '{col}' has {outlier_count} potential outlier(s) "
+                            f"(outside 1.5×IQR bounds: [{Q1 - 1.5*IQR:.2f}, {Q3 + 1.5*IQR:.2f}])."
+                })
+
+        findings[sheet_name] = sheet_findings
+
+    return findings
+
+
+# ---------------------------------------------------------------------------
+# Summary statistics
+# ---------------------------------------------------------------------------
+
+def compute_stats(sheets: dict) -> dict:
+    """Compute descriptive statistics for numeric columns per sheet."""
+    stats = {}
+    for sheet_name, df in sheets.items():
+        numeric_df = df.select_dtypes(include="number")
+        if numeric_df.empty:
+            stats[sheet_name] = {}
+            continue
+        desc = numeric_df.describe().round(4)
+        stats[sheet_name] = desc.to_dict()
+    return stats
+
+
+# ---------------------------------------------------------------------------
+# Human-readable report rendering
+# ---------------------------------------------------------------------------
+
+def render_report(
+    file_path: str,
+    structure: dict,
+    quality: dict,
+    stats: dict,
+) -> str:
+    lines = []
+    p = lines.append
+
+    p("=" * 60)
+    p(f"ANALYSIS REPORT: {Path(file_path).name}")
+    p("=" * 60)
+
+    # File overview
+    sheet_list = list(structure.keys())
+    total_rows = sum(s["shape"]["rows"] for s in structure.values())
+    p(f"\nSheets ({len(sheet_list)}): {', '.join(sheet_list)}")
+    p(f"Total rows across all sheets: {total_rows:,}")
+
+    for sheet_name, info in structure.items():
+        p(f"\n{'─' * 50}")
+        p(f"Sheet: {sheet_name}")
+        p(f"{'─' * 50}")
+        p(f"  Size: {info['shape']['rows']:,} rows × {info['shape']['cols']} cols")
+        p(f"  Columns: {info['columns']}")
+
+        # Data types
+        p("\n  Column types:")
+        for col, dtype in info["dtypes"].items():
+            p(f"    {col}: {dtype}")
+
+        # Nulls
+        if info["null_columns"]:
+            p("\n  Null values (columns with nulls only):")
+            for col, null_info in info["null_columns"].items():
+                p(f"    {col}: {null_info['count']} nulls ({null_info['pct']}%)")
+        else:
+            p("\n  Null values: none")
+
+        # Stats
+        sheet_stats = stats.get(sheet_name, {})
+        if sheet_stats:
+            p("\n  Numeric column statistics:")
+            numeric_cols = list(sheet_stats.keys())
+            # Show only first 6 to keep report readable
+            for col in numeric_cols[:6]:
+                col_stats = sheet_stats[col]
+                p(f"    {col}:")
+                p(f"      count={col_stats.get('count', 'N/A')}  "
+                  f"mean={col_stats.get('mean', 'N/A')}  "
+                  f"min={col_stats.get('min', 'N/A')}  "
+                  f"max={col_stats.get('max', 'N/A')}")
+            if len(numeric_cols) > 6:
+                p(f"    ... and {len(numeric_cols) - 6} more numeric columns")
+
+        # Quality findings for this sheet
+        sheet_quality = quality.get(sheet_name, [])
+        if sheet_quality:
+            p(f"\n  Data quality issues ({len(sheet_quality)} found):")
+            for finding in sheet_quality:
+                p(f"    [{finding['type'].upper()}] {finding['note']}")
+        else:
+            p("\n  Data quality: no issues found")
+
+        # Preview
+        if info["preview"]:
+            p("\n  Preview (first 3 rows):")
+            import pandas as pd
+            preview_df = pd.DataFrame(info["preview"][:3])
+            for line in preview_df.to_string(index=False).splitlines():
+                p(f"    {line}")
+
+    p("\n" + "=" * 60)
+    quality_issue_count = sum(len(v) for v in quality.values())
+    if quality_issue_count == 0:
+        p("RESULT: No data quality issues detected.")
+    else:
+        p(f"RESULT: {quality_issue_count} data quality issue(s) found. See details above.")
+    p("=" * 60)
+
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# CLI entry point
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Read and analyze Excel/CSV files without modifying them."
+    )
+    parser.add_argument("file", help="Path to .xlsx, .xlsm, .csv, or .tsv file")
+    parser.add_argument("--sheet", help="Analyze a specific sheet only", default=None)
+    parser.add_argument(
+        "--json", action="store_true", help="Output machine-readable JSON"
+    )
+    parser.add_argument(
+        "--quality", action="store_true",
+        help="Run data quality audit only (skip stats)"
+    )
+    args = parser.parse_args()
+
+    try:
+        sheets = detect_and_load(args.file, sheet_name_filter=args.sheet)
+    except (FileNotFoundError, ValueError, RuntimeError) as e:
+        print(f"ERROR: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    structure = explore_structure(sheets)
+    quality = audit_quality(sheets)
+    stats = {} if args.quality else compute_stats(sheets)
+
+    if args.json:
+        output = {
+            "file": args.file,
+            "structure": structure,
+            "quality": quality,
+            "stats": stats,
+        }
+        # Convert preview records to serializable form (handle non-JSON types)
+        print(json.dumps(output, indent=2, ensure_ascii=False, default=str))
+    else:
+        report = render_report(args.file, structure, quality, stats)
+        print(report)
+
+
+if __name__ == "__main__":
+    main()
--- a/minimax-xlsx/scripts/xlsx_shift_rows.py
+++ b/minimax-xlsx/scripts/xlsx_shift_rows.py
@@ -0,0 +1,396 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+"""
+xlsx_shift_rows.py — Shift all row references in an unpacked xlsx working directory
+after inserting or deleting rows.
+
+Usage:
+    # Insert 2 rows at row 5 (rows 5+ shift down by 2)
+    python3 xlsx_shift_rows.py <work_dir> insert 5 2
+
+    # Delete 1 row at row 8 (rows 9+ shift up by 1)
+    python3 xlsx_shift_rows.py <work_dir> delete 8 1
+
+What it updates in every XML file under <work_dir>:
+  - <row r="N"> attributes in worksheet sheetData
+  - <c r="XN"> cell address attributes in worksheet sheetData
+  - <f> formula text: absolute row references (e.g. B7, $B$7, $B7) in all sheets
+  - <mergeCell ref="A5:C7"> ranges
+  - <conditionalFormatting sqref="..."> ranges
+  - <dataValidations sqref="..."> ranges
+  - <dimension ref="A1:D20"> extent marker
+  - Table <table ref="A1:D20"> in xl/tables/*.xml
+  - Chart series <numRef><f> and <strRef><f> range references in xl/charts/*.xml
+  - PivotCache source <worksheetSource ref="..."> in xl/pivotCaches/*.xml
+
+IMPORTANT: Run this script on the UNPACKED directory before repacking.
+After running, repack with xlsx_pack.py and re-validate with formula_check.py.
+
+Limitations:
+  - Named ranges in workbook.xml <definedNames> are NOT updated automatically.
+    Review them manually after running this script.
+  - Structured table references (Table[@Column]) are NOT updated.
+  - External workbook links in xl/externalLinks/ are NOT updated.
+"""
+
+import sys
+import os
+import re
+import xml.etree.ElementTree as ET
+import xml.dom.minidom
+
+
+def col_letter(n: int) -> str:
+    """Convert 1-based column number to Excel column letter(s)."""
+    r = ""
+    while n > 0:
+        n, rem = divmod(n - 1, 26)
+        r = chr(65 + rem) + r
+    return r
+
+
+def col_number(s: str) -> int:
+    """Convert Excel column letter(s) to 1-based column number."""
+    n = 0
+    for c in s.upper():
+        n = n * 26 + (ord(c) - 64)
+    return n
+
+
+# ---------------------------------------------------------------------------
+# Core shifting logic for formula strings
+# ---------------------------------------------------------------------------
+
+def _shift_refs(text: str, at: int, delta: int) -> str:
+    """Shift cell references in a non-quoted formula fragment."""
+    def replacer(m: re.Match) -> str:
+        dollar_col = m.group(1)   # "$" or ""
+        col_part = m.group(2)     # e.g. "B" or "AB"
+        dollar_row = m.group(3)   # "$" or ""
+        row_str = m.group(4)      # e.g. "7"
+        row = int(row_str)
+        if row >= at:
+            row = max(1, row + delta)
+        return f"{dollar_col}{col_part}{dollar_row}{row}"
+
+    pattern = r'(\$?)([A-Z]+)(\$?)(\d+)'
+    return re.sub(pattern, replacer, text)
+
+
+def shift_formula(formula: str, at: int, delta: int) -> str:
+    """
+    Shift absolute and mixed row references >= `at` by `delta` in a formula string.
+
+    Handles:
+      B7       (relative col, absolute row — shifts if row >= at)
+      $B$7     (absolute col, absolute row — shifts)
+      $B7      (absolute col, relative row — shifts)
+      B$7      (relative col, absolute — shifts)
+      BUT NOT:  B:B  (whole-column reference — left as-is)
+
+    Skips content inside single-quoted sheet name prefixes to avoid
+    corrupting names like 'Budget FY2025' (where FY2025 is NOT a cell ref).
+
+    Does NOT handle:
+      - Named ranges
+      - Structured references (Table[@Col])
+      - R1C1 notation
+    """
+    # Split on quoted sheet names: 'Sheet Name' portions are odd-indexed
+    segments = re.split(r"('[^']*(?:''[^']*)*')", formula)
+    result = []
+    for i, seg in enumerate(segments):
+        if i % 2 == 1:
+            result.append(seg)
+        else:
+            result.append(_shift_refs(seg, at, delta))
+    return "".join(result)
+
+
+def shift_sqref(sqref: str, at: int, delta: int) -> str:
+    """
+    Shift row references in a sqref string (space-separated cell/range addresses).
+    E.g. "A5:D20 B30" → shift rows >= 5 by delta.
+    """
+    parts = sqref.split()
+    result = []
+    for part in parts:
+        if ':' in part:
+            left, right = part.split(':', 1)
+            left = shift_formula(left, at, delta)
+            right = shift_formula(right, at, delta)
+            result.append(f"{left}:{right}")
+        else:
+            result.append(shift_formula(part, at, delta))
+    return " ".join(result)
+
+
+def shift_chart_range(text: str, at: int, delta: int) -> str:
+    """
+    Shift row references inside a chart range formula like:
+      Sheet1!$B$5:$B$20
+      'Q1 Data'!$A$3:$A$15
+    """
+    # Split on the "!" to preserve sheet name
+    if '!' not in text:
+        return text
+    bang = text.index('!')
+    sheet_part = text[:bang + 1]
+    range_part = text[bang + 1:]
+    return sheet_part + shift_formula(range_part, at, delta)
+
+
+# ---------------------------------------------------------------------------
+# XML file processors
+# ---------------------------------------------------------------------------
+
+NS_MAIN = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
+NS_DRAWING = "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing"
+
+# Namespace map used by ElementTree for tag lookup
+NSMAP = {"ss": NS_MAIN}
+
+
+def _tag(local: str) -> str:
+    return f"{{{NS_MAIN}}}{local}"
+
+
+def process_worksheet(path: str, at: int, delta: int) -> int:
+    """Update row/cell references in a worksheet XML. Returns change count."""
+    tree = ET.parse(path)
+    root = tree.getroot()
+    changes = 0
+
+    # 1. <dimension ref="A1:D20">
+    for dim in root.iter(_tag("dimension")):
+        old = dim.get("ref", "")
+        new = shift_sqref(old, at, delta)
+        if new != old:
+            dim.set("ref", new)
+            changes += 1
+
+    # 2. <row r="N"> and <c r="XN"> inside sheetData
+    sheet_data = root.find(_tag("sheetData"))
+    if sheet_data is not None:
+        rows_to_reorder = []
+        for row_el in list(sheet_data):
+            r_str = row_el.get("r")
+            if r_str is None:
+                continue
+            r = int(r_str)
+            if r >= at:
+                new_r = max(1, r + delta)
+                row_el.set("r", str(new_r))
+                changes += 1
+                # Update each cell's r attribute
+                for cell_el in row_el:
+                    cell_ref = cell_el.get("r", "")
+                    if cell_ref:
+                        new_ref = shift_formula(cell_ref, at, delta)
+                        if new_ref != cell_ref:
+                            cell_el.set("r", new_ref)
+                            changes += 1
+
+            # Also update formulas in every row (formulas can reference any row)
+            for cell_el in row_el:
+                f_el = cell_el.find(_tag("f"))
+                if f_el is not None and f_el.text:
+                    new_f = shift_formula(f_el.text, at, delta)
+                    if new_f != f_el.text:
+                        f_el.text = new_f
+                        changes += 1
+
+    # 3. <mergeCell ref="A5:C7">
+    for mc in root.iter(_tag("mergeCell")):
+        old = mc.get("ref", "")
+        new = shift_sqref(old, at, delta)
+        if new != old:
+            mc.set("ref", new)
+            changes += 1
+
+    # 4. <conditionalFormatting sqref="...">
+    for cf in root.iter(_tag("conditionalFormatting")):
+        old = cf.get("sqref", "")
+        new = shift_sqref(old, at, delta)
+        if new != old:
+            cf.set("sqref", new)
+            changes += 1
+
+    # 5. <dataValidation sqref="...">
+    for dv in root.iter(_tag("dataValidation")):
+        old = dv.get("sqref", "")
+        new = shift_sqref(old, at, delta)
+        if new != old:
+            dv.set("sqref", new)
+            changes += 1
+
+    if changes > 0:
+        _write_tree(tree, path)
+    return changes
+
+
+def process_chart(path: str, at: int, delta: int) -> int:
+    """Update data range references in a chart XML."""
+    # Charts use DrawingML namespace; we look for <f> elements with range strings
+    with open(path, "r", encoding="utf-8") as fh:
+        content = fh.read()
+
+    # Pattern matches content of <f>Sheet1!$A$1:$A$10</f> style elements
+    def replace_f(m: re.Match) -> str:
+        tag_open = m.group(1)
+        inner = m.group(2)
+        tag_close = m.group(3)
+        new_inner = shift_chart_range(inner, at, delta)
+        return f"{tag_open}{new_inner}{tag_close}"
+
+    new_content = re.sub(r'(<(?:[^:>]+:)?f>)([^<]+)(</(?:[^:>]+:)?f>)',
+                          replace_f, content)
+    changes = content != new_content
+    if changes:
+        with open(path, "w", encoding="utf-8") as fh:
+            fh.write(new_content)
+    return 1 if changes else 0
+
+
+def process_table(path: str, at: int, delta: int) -> int:
+    """Update the ref attribute on the <table> root element."""
+    tree = ET.parse(path)
+    root = tree.getroot()
+    # The root element IS the table
+    old = root.get("ref", "")
+    if not old:
+        return 0
+    new = shift_sqref(old, at, delta)
+    if new == old:
+        return 0
+    root.set("ref", new)
+    _write_tree(tree, path)
+    return 1
+
+
+def process_pivot_cache(path: str, at: int, delta: int) -> int:
+    """Update worksheetSource ref in a pivot cache definition."""
+    tree = ET.parse(path)
+    root = tree.getroot()
+    changes = 0
+    # Look for <worksheetSource ref="A1:D100" ...>
+    for ws in root.iter():
+        if ws.tag.endswith("}worksheetSource") or ws.tag == "worksheetSource":
+            old = ws.get("ref", "")
+            if old:
+                new = shift_sqref(old, at, delta)
+                if new != old:
+                    ws.set("ref", new)
+                    changes += 1
+    if changes:
+        _write_tree(tree, path)
+    return changes
+
+
+def _write_tree(tree: ET.ElementTree, path: str) -> None:
+    """Write ElementTree back to file with pretty-printing."""
+    tree.write(path, encoding="unicode", xml_declaration=False)
+    # Re-pretty-print for readability
+    with open(path, "r", encoding="utf-8") as fh:
+        raw = fh.read()
+    try:
+        dom = xml.dom.minidom.parseString(raw.encode("utf-8"))
+        pretty = dom.toprettyxml(indent="  ", encoding="utf-8").decode("utf-8")
+        lines = [line for line in pretty.splitlines() if line.strip()]
+        with open(path, "w", encoding="utf-8") as fh:
+            fh.write("\n".join(lines) + "\n")
+    except Exception:
+        pass  # If pretty-print fails, leave the file as-is
+
+
+# ---------------------------------------------------------------------------
+# Main driver
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    if len(sys.argv) < 5:
+        print(__doc__)
+        sys.exit(1)
+
+    work_dir = sys.argv[1]
+    operation = sys.argv[2].lower()
+    at = int(sys.argv[3])
+    count = int(sys.argv[4])
+
+    if operation not in ("insert", "delete"):
+        print(f"ERROR: operation must be 'insert' or 'delete', got '{operation}'")
+        sys.exit(1)
+
+    if operation == "insert":
+        delta = count
+    else:
+        delta = -count
+
+    if not os.path.isdir(work_dir):
+        print(f"ERROR: Directory not found: {work_dir}")
+        sys.exit(1)
+
+    print(f"Operation : {operation} {count} row(s) at row {at} (delta={delta:+d})")
+    print(f"Work dir  : {work_dir}")
+    print()
+
+    total_changes = 0
+
+    # Process all worksheets
+    ws_dir = os.path.join(work_dir, "xl", "worksheets")
+    if os.path.isdir(ws_dir):
+        for fname in sorted(os.listdir(ws_dir)):
+            if fname.endswith(".xml"):
+                fpath = os.path.join(ws_dir, fname)
+                n = process_worksheet(fpath, at, delta)
+                if n:
+                    print(f"  Updated {n:3d} references in xl/worksheets/{fname}")
+                    total_changes += n
+
+    # Process all charts
+    charts_dir = os.path.join(work_dir, "xl", "charts")
+    if os.path.isdir(charts_dir):
+        for fname in sorted(os.listdir(charts_dir)):
+            if fname.endswith(".xml"):
+                fpath = os.path.join(charts_dir, fname)
+                n = process_chart(fpath, at, delta)
+                if n:
+                    print(f"  Updated chart ranges in xl/charts/{fname}")
+                    total_changes += n
+
+    # Process all tables
+    tables_dir = os.path.join(work_dir, "xl", "tables")
+    if os.path.isdir(tables_dir):
+        for fname in sorted(os.listdir(tables_dir)):
+            if fname.endswith(".xml"):
+                fpath = os.path.join(tables_dir, fname)
+                n = process_table(fpath, at, delta)
+                if n:
+                    print(f"  Updated table ref in xl/tables/{fname}")
+                    total_changes += n
+
+    # Process pivot cache definitions
+    cache_dir = os.path.join(work_dir, "xl", "pivotCaches")
+    if os.path.isdir(cache_dir):
+        for fname in sorted(os.listdir(cache_dir)):
+            if "Definition" in fname and fname.endswith(".xml"):
+                fpath = os.path.join(cache_dir, fname)
+                n = process_pivot_cache(fpath, at, delta)
+                if n:
+                    print(f"  Updated pivot source range in xl/pivotCaches/{fname}")
+                    total_changes += n
+
+    print()
+    print(f"Total changes: {total_changes}")
+    print()
+    print("IMPORTANT: Review named ranges in xl/workbook.xml <definedNames> manually.")
+    print("           Structured table references (Table[@Col]) are NOT updated.")
+    print()
+    print("Next steps:")
+    print("  1. Review the changes above")
+    print(f"  2. python3 xlsx_pack.py {work_dir} output.xlsx")
+    print("  3. python3 formula_check.py output.xlsx")
+
+
+if __name__ == "__main__":
+    main()
--- a/minimax-xlsx/scripts/xlsx_unpack.py
+++ b/minimax-xlsx/scripts/xlsx_unpack.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+"""
+xlsx_unpack.py — Unpack an xlsx file into a working directory for XML editing.
+
+Usage:
+    python3 xlsx_unpack.py <input.xlsx> <output_dir>
+
+What it does:
+1. Unzips the xlsx (which is a ZIP archive)
+2. Pretty-prints all XML and .rels files for readability
+3. Prints a summary of key files to edit
+"""
+
+import sys
+import zipfile
+import os
+import shutil
+import xml.dom.minidom
+
+
+def pretty_print_xml(content: bytes) -> str:
+    """Pretty-print XML bytes. Returns original content on parse failure."""
+    try:
+        dom = xml.dom.minidom.parseString(content)
+        pretty = dom.toprettyxml(indent="  ", encoding="utf-8").decode("utf-8")
+        # Remove the extra blank lines toprettyxml adds
+        lines = [line for line in pretty.splitlines() if line.strip()]
+        return "\n".join(lines) + "\n"
+    except Exception:
+        return content.decode("utf-8", errors="replace")
+
+
+def unpack(xlsx_path: str, output_dir: str) -> None:
+    if not os.path.isfile(xlsx_path):
+        print(f"ERROR: File not found: {xlsx_path}", file=sys.stderr)
+        sys.exit(1)
+
+    if not xlsx_path.lower().endswith((".xlsx", ".xlsm")):
+        print(f"WARNING: '{xlsx_path}' does not have an .xlsx/.xlsm extension", file=sys.stderr)
+
+    if os.path.exists(output_dir):
+        shutil.rmtree(output_dir)
+    os.makedirs(output_dir)
+
+    try:
+        with zipfile.ZipFile(xlsx_path, "r") as z:
+            # Validate member paths to prevent zip-slip (path traversal) attacks
+            for member in z.namelist():
+                member_path = os.path.realpath(os.path.join(output_dir, member))
+                if not member_path.startswith(os.path.realpath(output_dir) + os.sep) and member_path != os.path.realpath(output_dir):
+                    print(f"ERROR: Zip entry '{member}' would escape target directory (path traversal blocked)", file=sys.stderr)
+                    shutil.rmtree(output_dir, ignore_errors=True)
+                    sys.exit(1)
+            z.extractall(output_dir)
+    except zipfile.BadZipFile:
+        shutil.rmtree(output_dir, ignore_errors=True)
+        print(f"ERROR: '{xlsx_path}' is not a valid ZIP/xlsx file", file=sys.stderr)
+        sys.exit(1)
+
+    # Pretty-print XML and .rels files
+    xml_count = 0
+    for dirpath, _, filenames in os.walk(output_dir):
+        for fname in filenames:
+            if fname.endswith(".xml") or fname.endswith(".rels"):
+                fpath = os.path.join(dirpath, fname)
+                with open(fpath, "rb") as f:
+                    raw = f.read()
+                pretty = pretty_print_xml(raw)
+                with open(fpath, "w", encoding="utf-8") as f:
+                    f.write(pretty)
+                xml_count += 1
+
+    print(f"Unpacked '{xlsx_path}' → '{output_dir}'")
+    print(f"Pretty-printed {xml_count} XML/rels files\n")
+
+    # Print key files grouped by category
+    categories = {
+        "Package root": ["[Content_Types].xml", "_rels/.rels"],
+        "Workbook": ["xl/workbook.xml", "xl/_rels/workbook.xml.rels"],
+        "Styles & Strings": ["xl/styles.xml", "xl/sharedStrings.xml"],
+        "Worksheets": [],
+    }
+
+    all_files = []
+    for dirpath, _, filenames in os.walk(output_dir):
+        for fname in filenames:
+            rel = os.path.relpath(os.path.join(dirpath, fname), output_dir)
+            all_files.append(rel)
+
+    # Collect worksheets
+    for rel in sorted(all_files):
+        if rel.startswith("xl/worksheets/") and rel.endswith(".xml"):
+            categories["Worksheets"].append(rel)
+
+    print("Key files to inspect/edit:")
+    for category, files in categories.items():
+        if not files:
+            continue
+        print(f"\n  [{category}]")
+        for f in files:
+            full = os.path.join(output_dir, f)
+            if os.path.isfile(full):
+                size = os.path.getsize(full)
+                print(f"    {f}  ({size:,} bytes)")
+            else:
+                print(f"    {f}  (not found)")
+
+    # Warn about high-risk files present
+    risky = {
+        "xl/vbaProject.bin": "VBA macros — DO NOT modify",
+        "xl/pivotTables": "Pivot tables — update source ranges carefully if shifting rows",
+        "xl/charts": "Charts — update data ranges if shifting rows",
+    }
+    print("\n  [High-risk content detected:]")
+    found_any = False
+    for path, warning in risky.items():
+        full = os.path.join(output_dir, path)
+        if os.path.exists(full):
+            print(f"    ⚠️  {path} — {warning}")
+            found_any = True
+    if not found_any:
+        print("    ✓ None (safe to edit)")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: xlsx_unpack.py <input.xlsx> <output_dir>")
+        sys.exit(1)
+    unpack(sys.argv[1], sys.argv[2])