Files
skills/minimax-xlsx/scripts/xlsx_unpack.py
shihao 6487becf60 Initial commit: add all skills files
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 16:52:49 +08:00

131 lines
4.7 KiB
Python

#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
"""
xlsx_unpack.py — Unpack an xlsx file into a working directory for XML editing.
Usage:
python3 xlsx_unpack.py <input.xlsx> <output_dir>
What it does:
1. Unzips the xlsx (which is a ZIP archive)
2. Pretty-prints all XML and .rels files for readability
3. Prints a summary of key files to edit
"""
import sys
import zipfile
import os
import shutil
import xml.dom.minidom
def pretty_print_xml(content: bytes) -> str:
"""Pretty-print XML bytes. Returns original content on parse failure."""
try:
dom = xml.dom.minidom.parseString(content)
pretty = dom.toprettyxml(indent=" ", encoding="utf-8").decode("utf-8")
# Remove the extra blank lines toprettyxml adds
lines = [line for line in pretty.splitlines() if line.strip()]
return "\n".join(lines) + "\n"
except Exception:
return content.decode("utf-8", errors="replace")
def unpack(xlsx_path: str, output_dir: str) -> None:
if not os.path.isfile(xlsx_path):
print(f"ERROR: File not found: {xlsx_path}", file=sys.stderr)
sys.exit(1)
if not xlsx_path.lower().endswith((".xlsx", ".xlsm")):
print(f"WARNING: '{xlsx_path}' does not have an .xlsx/.xlsm extension", file=sys.stderr)
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir)
try:
with zipfile.ZipFile(xlsx_path, "r") as z:
# Validate member paths to prevent zip-slip (path traversal) attacks
for member in z.namelist():
member_path = os.path.realpath(os.path.join(output_dir, member))
if not member_path.startswith(os.path.realpath(output_dir) + os.sep) and member_path != os.path.realpath(output_dir):
print(f"ERROR: Zip entry '{member}' would escape target directory (path traversal blocked)", file=sys.stderr)
shutil.rmtree(output_dir, ignore_errors=True)
sys.exit(1)
z.extractall(output_dir)
except zipfile.BadZipFile:
shutil.rmtree(output_dir, ignore_errors=True)
print(f"ERROR: '{xlsx_path}' is not a valid ZIP/xlsx file", file=sys.stderr)
sys.exit(1)
# Pretty-print XML and .rels files
xml_count = 0
for dirpath, _, filenames in os.walk(output_dir):
for fname in filenames:
if fname.endswith(".xml") or fname.endswith(".rels"):
fpath = os.path.join(dirpath, fname)
with open(fpath, "rb") as f:
raw = f.read()
pretty = pretty_print_xml(raw)
with open(fpath, "w", encoding="utf-8") as f:
f.write(pretty)
xml_count += 1
print(f"Unpacked '{xlsx_path}''{output_dir}'")
print(f"Pretty-printed {xml_count} XML/rels files\n")
# Print key files grouped by category
categories = {
"Package root": ["[Content_Types].xml", "_rels/.rels"],
"Workbook": ["xl/workbook.xml", "xl/_rels/workbook.xml.rels"],
"Styles & Strings": ["xl/styles.xml", "xl/sharedStrings.xml"],
"Worksheets": [],
}
all_files = []
for dirpath, _, filenames in os.walk(output_dir):
for fname in filenames:
rel = os.path.relpath(os.path.join(dirpath, fname), output_dir)
all_files.append(rel)
# Collect worksheets
for rel in sorted(all_files):
if rel.startswith("xl/worksheets/") and rel.endswith(".xml"):
categories["Worksheets"].append(rel)
print("Key files to inspect/edit:")
for category, files in categories.items():
if not files:
continue
print(f"\n [{category}]")
for f in files:
full = os.path.join(output_dir, f)
if os.path.isfile(full):
size = os.path.getsize(full)
print(f" {f} ({size:,} bytes)")
else:
print(f" {f} (not found)")
# Warn about high-risk files present
risky = {
"xl/vbaProject.bin": "VBA macros — DO NOT modify",
"xl/pivotTables": "Pivot tables — update source ranges carefully if shifting rows",
"xl/charts": "Charts — update data ranges if shifting rows",
}
print("\n [High-risk content detected:]")
found_any = False
for path, warning in risky.items():
full = os.path.join(output_dir, path)
if os.path.exists(full):
print(f" ⚠️ {path}{warning}")
found_any = True
if not found_any:
print(" ✓ None (safe to edit)")
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: xlsx_unpack.py <input.xlsx> <output_dir>")
sys.exit(1)
unpack(sys.argv[1], sys.argv[2])