249 lines
9.2 KiB
Python
249 lines
9.2 KiB
Python
#!/usr/bin/env python3
|
|
# SPDX-License-Identifier: MIT
|
|
"""
|
|
libreoffice_recalc.py — Tier 2 dynamic formula recalculation via LibreOffice headless.
|
|
|
|
Opens the xlsx file with the LibreOffice Calc engine, executes all formulas, writes
|
|
the computed values into the <v> cache elements, and saves the result. This is the
|
|
closest server-side equivalent of "open in Excel and save."
|
|
|
|
After recalculation, run formula_check.py on the output file to detect runtime errors
|
|
(#DIV/0!, #N/A, etc.) that only surface after actual computation.
|
|
|
|
Usage:
|
|
python3 libreoffice_recalc.py input.xlsx output.xlsx
|
|
python3 libreoffice_recalc.py input.xlsx output.xlsx --timeout 90
|
|
python3 libreoffice_recalc.py --check # check LibreOffice availability only
|
|
|
|
Exit codes:
|
|
0 — recalculation succeeded, output file written
|
|
2 — LibreOffice not found (Tier 2 unavailable — not a hard failure, note in report)
|
|
1 — LibreOffice found but recalculation failed (timeout, crash, bad file)
|
|
"""
|
|
|
|
import subprocess
|
|
import sys
|
|
import shutil
|
|
import os
|
|
import tempfile
|
|
import argparse
|
|
|
|
|
|
# ── LibreOffice discovery ───────────────────────────────────────────────────
|
|
|
|
def find_soffice() -> str | None:
|
|
"""
|
|
Locate the soffice (LibreOffice) binary.
|
|
|
|
Search order:
|
|
1. macOS application bundle (default install location)
|
|
2. PATH lookup for 'soffice'
|
|
3. PATH lookup for 'libreoffice' (common on Linux)
|
|
"""
|
|
candidates = [
|
|
"/Applications/LibreOffice.app/Contents/MacOS/soffice", # macOS
|
|
"soffice", # Linux / macOS if on PATH
|
|
"libreoffice", # alternative Linux name
|
|
]
|
|
for c in candidates:
|
|
# shutil.which handles PATH lookup; also check absolute paths directly
|
|
found = shutil.which(c)
|
|
if found:
|
|
return found
|
|
if os.path.isfile(c) and os.access(c, os.X_OK):
|
|
return c
|
|
return None
|
|
|
|
|
|
def get_libreoffice_version(soffice: str) -> str:
|
|
"""Return LibreOffice version string, or 'unknown' on failure."""
|
|
try:
|
|
result = subprocess.run(
|
|
[soffice, "--version"],
|
|
capture_output=True,
|
|
timeout=10,
|
|
)
|
|
return result.stdout.decode(errors="replace").strip()
|
|
except Exception:
|
|
return "unknown"
|
|
|
|
|
|
# ── Recalculation ───────────────────────────────────────────────────────────
|
|
|
|
def recalculate(
|
|
input_path: str,
|
|
output_path: str,
|
|
timeout: int = 60,
|
|
) -> tuple[bool, str]:
|
|
"""
|
|
Run LibreOffice headless recalculation on input_path, write result to output_path.
|
|
|
|
Returns:
|
|
(success: bool, message: str)
|
|
|
|
The message explains what happened (success or failure reason).
|
|
"""
|
|
soffice = find_soffice()
|
|
if not soffice:
|
|
return False, (
|
|
"LibreOffice not found. Tier 2 validation is unavailable in this environment. "
|
|
"Install LibreOffice to enable dynamic formula recalculation.\n"
|
|
" macOS: brew install --cask libreoffice\n"
|
|
" Linux: sudo apt-get install -y libreoffice"
|
|
)
|
|
|
|
version = get_libreoffice_version(soffice)
|
|
|
|
# Work on a copy in a temp directory to avoid side effects on the source file.
|
|
# LibreOffice writes the output using the same filename stem in --outdir.
|
|
with tempfile.TemporaryDirectory(prefix="xlsx_recalc_") as tmpdir:
|
|
tmp_input = os.path.join(tmpdir, os.path.basename(input_path))
|
|
shutil.copy(input_path, tmp_input)
|
|
|
|
cmd = [
|
|
soffice,
|
|
"--headless",
|
|
"--norestore", # do not attempt to restore crashed sessions
|
|
"--infilter=Calc MS Excel 2007 XML",
|
|
"--convert-to", "xlsx",
|
|
"--outdir", tmpdir,
|
|
tmp_input,
|
|
]
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
timeout=timeout,
|
|
)
|
|
except subprocess.TimeoutExpired:
|
|
return False, (
|
|
f"LibreOffice timed out after {timeout}s. "
|
|
"The file may be too large or contain constructs that cause LibreOffice to hang. "
|
|
"Try increasing --timeout or simplify the file."
|
|
)
|
|
except FileNotFoundError:
|
|
return False, f"LibreOffice binary not executable: {soffice}"
|
|
|
|
if result.returncode != 0:
|
|
stderr = result.stderr.decode(errors="replace").strip()
|
|
stdout = result.stdout.decode(errors="replace").strip()
|
|
return False, (
|
|
f"LibreOffice exited with code {result.returncode}.\n"
|
|
f"stderr: {stderr}\n"
|
|
f"stdout: {stdout}"
|
|
)
|
|
|
|
# LibreOffice writes: <tmpdir>/<stem>.xlsx
|
|
stem = os.path.splitext(os.path.basename(tmp_input))[0]
|
|
tmp_output = os.path.join(tmpdir, stem + ".xlsx")
|
|
|
|
if not os.path.isfile(tmp_output):
|
|
# Try to find any .xlsx file in tmpdir (LibreOffice may behave differently)
|
|
xlsx_files = [f for f in os.listdir(tmpdir) if f.endswith(".xlsx") and f != os.path.basename(tmp_input)]
|
|
if xlsx_files:
|
|
tmp_output = os.path.join(tmpdir, xlsx_files[0])
|
|
else:
|
|
stdout = result.stdout.decode(errors="replace").strip()
|
|
return False, (
|
|
f"LibreOffice succeeded (exit 0) but output file not found in {tmpdir}.\n"
|
|
f"stdout: {stdout}\n"
|
|
f"Files in tmpdir: {os.listdir(tmpdir)}"
|
|
)
|
|
|
|
# Copy recalculated file to final destination
|
|
os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
|
|
shutil.copy(tmp_output, output_path)
|
|
|
|
return True, f"Recalculation complete. LibreOffice {version}. Output: {output_path}"
|
|
|
|
|
|
# ── CLI ─────────────────────────────────────────────────────────────────────
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(
|
|
description="LibreOffice headless formula recalculation for xlsx files.",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Basic recalculation
|
|
python3 libreoffice_recalc.py report.xlsx report_recalc.xlsx
|
|
|
|
# With extended timeout for large files
|
|
python3 libreoffice_recalc.py big_model.xlsx big_model_recalc.xlsx --timeout 120
|
|
|
|
# Check if LibreOffice is available (useful in CI)
|
|
python3 libreoffice_recalc.py --check
|
|
|
|
# Full validation pipeline
|
|
python3 libreoffice_recalc.py input.xlsx /tmp/recalc.xlsx && \\
|
|
python3 formula_check.py /tmp/recalc.xlsx
|
|
""",
|
|
)
|
|
parser.add_argument("input", nargs="?", help="Input xlsx file path")
|
|
parser.add_argument("output", nargs="?", help="Output xlsx file path (recalculated)")
|
|
parser.add_argument(
|
|
"--timeout",
|
|
type=int,
|
|
default=60,
|
|
metavar="SECONDS",
|
|
help="Maximum time to wait for LibreOffice (default: 60)",
|
|
)
|
|
parser.add_argument(
|
|
"--check",
|
|
action="store_true",
|
|
help="Only check if LibreOffice is available, then exit",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# ── --check mode ─────────────────────────────────────────────────────────
|
|
if args.check:
|
|
soffice = find_soffice()
|
|
if soffice:
|
|
version = get_libreoffice_version(soffice)
|
|
print(f"LibreOffice available: {soffice}")
|
|
print(f"Version: {version}")
|
|
sys.exit(0)
|
|
else:
|
|
print("LibreOffice NOT available.")
|
|
print("Tier 2 dynamic validation requires LibreOffice.")
|
|
print(" macOS: brew install --cask libreoffice")
|
|
print(" Linux: sudo apt-get install -y libreoffice")
|
|
sys.exit(2)
|
|
|
|
# ── Recalculation mode ────────────────────────────────────────────────────
|
|
if not args.input or not args.output:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
|
|
if not os.path.isfile(args.input):
|
|
print(f"ERROR: Input file not found: {args.input}")
|
|
sys.exit(1)
|
|
|
|
print(f"Input : {args.input}")
|
|
print(f"Output : {args.output}")
|
|
print(f"Timeout: {args.timeout}s")
|
|
print()
|
|
|
|
success, message = recalculate(args.input, args.output, timeout=args.timeout)
|
|
|
|
if success:
|
|
print(f"OK: {message}")
|
|
print()
|
|
print("Next step: run formula_check.py on the recalculated file to detect runtime errors:")
|
|
print(f" python3 formula_check.py {args.output}")
|
|
sys.exit(0)
|
|
else:
|
|
# Distinguish "not installed" (exit 2) from "failed" (exit 1)
|
|
if "not found" in message.lower() or "not available" in message.lower():
|
|
print(f"SKIP (Tier 2 unavailable): {message}")
|
|
sys.exit(2)
|
|
else:
|
|
print(f"ERROR: {message}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|