Files
skills/minimax-xlsx/scripts/libreoffice_recalc.py
shihao 6487becf60 Initial commit: add all skills files
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 16:52:49 +08:00

249 lines
9.2 KiB
Python

#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
"""
libreoffice_recalc.py — Tier 2 dynamic formula recalculation via LibreOffice headless.
Opens the xlsx file with the LibreOffice Calc engine, executes all formulas, writes
the computed values into the <v> cache elements, and saves the result. This is the
closest server-side equivalent of "open in Excel and save."
After recalculation, run formula_check.py on the output file to detect runtime errors
(#DIV/0!, #N/A, etc.) that only surface after actual computation.
Usage:
python3 libreoffice_recalc.py input.xlsx output.xlsx
python3 libreoffice_recalc.py input.xlsx output.xlsx --timeout 90
python3 libreoffice_recalc.py --check # check LibreOffice availability only
Exit codes:
0 — recalculation succeeded, output file written
2 — LibreOffice not found (Tier 2 unavailable — not a hard failure, note in report)
1 — LibreOffice found but recalculation failed (timeout, crash, bad file)
"""
import subprocess
import sys
import shutil
import os
import tempfile
import argparse
# ── LibreOffice discovery ───────────────────────────────────────────────────
def find_soffice() -> str | None:
"""
Locate the soffice (LibreOffice) binary.
Search order:
1. macOS application bundle (default install location)
2. PATH lookup for 'soffice'
3. PATH lookup for 'libreoffice' (common on Linux)
"""
candidates = [
"/Applications/LibreOffice.app/Contents/MacOS/soffice", # macOS
"soffice", # Linux / macOS if on PATH
"libreoffice", # alternative Linux name
]
for c in candidates:
# shutil.which handles PATH lookup; also check absolute paths directly
found = shutil.which(c)
if found:
return found
if os.path.isfile(c) and os.access(c, os.X_OK):
return c
return None
def get_libreoffice_version(soffice: str) -> str:
"""Return LibreOffice version string, or 'unknown' on failure."""
try:
result = subprocess.run(
[soffice, "--version"],
capture_output=True,
timeout=10,
)
return result.stdout.decode(errors="replace").strip()
except Exception:
return "unknown"
# ── Recalculation ───────────────────────────────────────────────────────────
def recalculate(
input_path: str,
output_path: str,
timeout: int = 60,
) -> tuple[bool, str]:
"""
Run LibreOffice headless recalculation on input_path, write result to output_path.
Returns:
(success: bool, message: str)
The message explains what happened (success or failure reason).
"""
soffice = find_soffice()
if not soffice:
return False, (
"LibreOffice not found. Tier 2 validation is unavailable in this environment. "
"Install LibreOffice to enable dynamic formula recalculation.\n"
" macOS: brew install --cask libreoffice\n"
" Linux: sudo apt-get install -y libreoffice"
)
version = get_libreoffice_version(soffice)
# Work on a copy in a temp directory to avoid side effects on the source file.
# LibreOffice writes the output using the same filename stem in --outdir.
with tempfile.TemporaryDirectory(prefix="xlsx_recalc_") as tmpdir:
tmp_input = os.path.join(tmpdir, os.path.basename(input_path))
shutil.copy(input_path, tmp_input)
cmd = [
soffice,
"--headless",
"--norestore", # do not attempt to restore crashed sessions
"--infilter=Calc MS Excel 2007 XML",
"--convert-to", "xlsx",
"--outdir", tmpdir,
tmp_input,
]
try:
result = subprocess.run(
cmd,
capture_output=True,
timeout=timeout,
)
except subprocess.TimeoutExpired:
return False, (
f"LibreOffice timed out after {timeout}s. "
"The file may be too large or contain constructs that cause LibreOffice to hang. "
"Try increasing --timeout or simplify the file."
)
except FileNotFoundError:
return False, f"LibreOffice binary not executable: {soffice}"
if result.returncode != 0:
stderr = result.stderr.decode(errors="replace").strip()
stdout = result.stdout.decode(errors="replace").strip()
return False, (
f"LibreOffice exited with code {result.returncode}.\n"
f"stderr: {stderr}\n"
f"stdout: {stdout}"
)
# LibreOffice writes: <tmpdir>/<stem>.xlsx
stem = os.path.splitext(os.path.basename(tmp_input))[0]
tmp_output = os.path.join(tmpdir, stem + ".xlsx")
if not os.path.isfile(tmp_output):
# Try to find any .xlsx file in tmpdir (LibreOffice may behave differently)
xlsx_files = [f for f in os.listdir(tmpdir) if f.endswith(".xlsx") and f != os.path.basename(tmp_input)]
if xlsx_files:
tmp_output = os.path.join(tmpdir, xlsx_files[0])
else:
stdout = result.stdout.decode(errors="replace").strip()
return False, (
f"LibreOffice succeeded (exit 0) but output file not found in {tmpdir}.\n"
f"stdout: {stdout}\n"
f"Files in tmpdir: {os.listdir(tmpdir)}"
)
# Copy recalculated file to final destination
os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
shutil.copy(tmp_output, output_path)
return True, f"Recalculation complete. LibreOffice {version}. Output: {output_path}"
# ── CLI ─────────────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="LibreOffice headless formula recalculation for xlsx files.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Basic recalculation
python3 libreoffice_recalc.py report.xlsx report_recalc.xlsx
# With extended timeout for large files
python3 libreoffice_recalc.py big_model.xlsx big_model_recalc.xlsx --timeout 120
# Check if LibreOffice is available (useful in CI)
python3 libreoffice_recalc.py --check
# Full validation pipeline
python3 libreoffice_recalc.py input.xlsx /tmp/recalc.xlsx && \\
python3 formula_check.py /tmp/recalc.xlsx
""",
)
parser.add_argument("input", nargs="?", help="Input xlsx file path")
parser.add_argument("output", nargs="?", help="Output xlsx file path (recalculated)")
parser.add_argument(
"--timeout",
type=int,
default=60,
metavar="SECONDS",
help="Maximum time to wait for LibreOffice (default: 60)",
)
parser.add_argument(
"--check",
action="store_true",
help="Only check if LibreOffice is available, then exit",
)
args = parser.parse_args()
# ── --check mode ─────────────────────────────────────────────────────────
if args.check:
soffice = find_soffice()
if soffice:
version = get_libreoffice_version(soffice)
print(f"LibreOffice available: {soffice}")
print(f"Version: {version}")
sys.exit(0)
else:
print("LibreOffice NOT available.")
print("Tier 2 dynamic validation requires LibreOffice.")
print(" macOS: brew install --cask libreoffice")
print(" Linux: sudo apt-get install -y libreoffice")
sys.exit(2)
# ── Recalculation mode ────────────────────────────────────────────────────
if not args.input or not args.output:
parser.print_help()
sys.exit(1)
if not os.path.isfile(args.input):
print(f"ERROR: Input file not found: {args.input}")
sys.exit(1)
print(f"Input : {args.input}")
print(f"Output : {args.output}")
print(f"Timeout: {args.timeout}s")
print()
success, message = recalculate(args.input, args.output, timeout=args.timeout)
if success:
print(f"OK: {message}")
print()
print("Next step: run formula_check.py on the recalculated file to detect runtime errors:")
print(f" python3 formula_check.py {args.output}")
sys.exit(0)
else:
# Distinguish "not installed" (exit 2) from "failed" (exit 1)
if "not found" in message.lower() or "not available" in message.lower():
print(f"SKIP (Tier 2 unavailable): {message}")
sys.exit(2)
else:
print(f"ERROR: {message}")
sys.exit(1)
if __name__ == "__main__":
main()