Initial commit: add all skills files
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
130
minimax-xlsx/scripts/xlsx_unpack.py
Normal file
130
minimax-xlsx/scripts/xlsx_unpack.py
Normal file
@@ -0,0 +1,130 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: MIT
|
||||
"""
|
||||
xlsx_unpack.py — Unpack an xlsx file into a working directory for XML editing.
|
||||
|
||||
Usage:
|
||||
python3 xlsx_unpack.py <input.xlsx> <output_dir>
|
||||
|
||||
What it does:
|
||||
1. Unzips the xlsx (which is a ZIP archive)
|
||||
2. Pretty-prints all XML and .rels files for readability
|
||||
3. Prints a summary of key files to edit
|
||||
"""
|
||||
|
||||
import sys
|
||||
import zipfile
|
||||
import os
|
||||
import shutil
|
||||
import xml.dom.minidom
|
||||
|
||||
|
||||
def pretty_print_xml(content: bytes) -> str:
|
||||
"""Pretty-print XML bytes. Returns original content on parse failure."""
|
||||
try:
|
||||
dom = xml.dom.minidom.parseString(content)
|
||||
pretty = dom.toprettyxml(indent=" ", encoding="utf-8").decode("utf-8")
|
||||
# Remove the extra blank lines toprettyxml adds
|
||||
lines = [line for line in pretty.splitlines() if line.strip()]
|
||||
return "\n".join(lines) + "\n"
|
||||
except Exception:
|
||||
return content.decode("utf-8", errors="replace")
|
||||
|
||||
|
||||
def unpack(xlsx_path: str, output_dir: str) -> None:
|
||||
if not os.path.isfile(xlsx_path):
|
||||
print(f"ERROR: File not found: {xlsx_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if not xlsx_path.lower().endswith((".xlsx", ".xlsm")):
|
||||
print(f"WARNING: '{xlsx_path}' does not have an .xlsx/.xlsm extension", file=sys.stderr)
|
||||
|
||||
if os.path.exists(output_dir):
|
||||
shutil.rmtree(output_dir)
|
||||
os.makedirs(output_dir)
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(xlsx_path, "r") as z:
|
||||
# Validate member paths to prevent zip-slip (path traversal) attacks
|
||||
for member in z.namelist():
|
||||
member_path = os.path.realpath(os.path.join(output_dir, member))
|
||||
if not member_path.startswith(os.path.realpath(output_dir) + os.sep) and member_path != os.path.realpath(output_dir):
|
||||
print(f"ERROR: Zip entry '{member}' would escape target directory (path traversal blocked)", file=sys.stderr)
|
||||
shutil.rmtree(output_dir, ignore_errors=True)
|
||||
sys.exit(1)
|
||||
z.extractall(output_dir)
|
||||
except zipfile.BadZipFile:
|
||||
shutil.rmtree(output_dir, ignore_errors=True)
|
||||
print(f"ERROR: '{xlsx_path}' is not a valid ZIP/xlsx file", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Pretty-print XML and .rels files
|
||||
xml_count = 0
|
||||
for dirpath, _, filenames in os.walk(output_dir):
|
||||
for fname in filenames:
|
||||
if fname.endswith(".xml") or fname.endswith(".rels"):
|
||||
fpath = os.path.join(dirpath, fname)
|
||||
with open(fpath, "rb") as f:
|
||||
raw = f.read()
|
||||
pretty = pretty_print_xml(raw)
|
||||
with open(fpath, "w", encoding="utf-8") as f:
|
||||
f.write(pretty)
|
||||
xml_count += 1
|
||||
|
||||
print(f"Unpacked '{xlsx_path}' → '{output_dir}'")
|
||||
print(f"Pretty-printed {xml_count} XML/rels files\n")
|
||||
|
||||
# Print key files grouped by category
|
||||
categories = {
|
||||
"Package root": ["[Content_Types].xml", "_rels/.rels"],
|
||||
"Workbook": ["xl/workbook.xml", "xl/_rels/workbook.xml.rels"],
|
||||
"Styles & Strings": ["xl/styles.xml", "xl/sharedStrings.xml"],
|
||||
"Worksheets": [],
|
||||
}
|
||||
|
||||
all_files = []
|
||||
for dirpath, _, filenames in os.walk(output_dir):
|
||||
for fname in filenames:
|
||||
rel = os.path.relpath(os.path.join(dirpath, fname), output_dir)
|
||||
all_files.append(rel)
|
||||
|
||||
# Collect worksheets
|
||||
for rel in sorted(all_files):
|
||||
if rel.startswith("xl/worksheets/") and rel.endswith(".xml"):
|
||||
categories["Worksheets"].append(rel)
|
||||
|
||||
print("Key files to inspect/edit:")
|
||||
for category, files in categories.items():
|
||||
if not files:
|
||||
continue
|
||||
print(f"\n [{category}]")
|
||||
for f in files:
|
||||
full = os.path.join(output_dir, f)
|
||||
if os.path.isfile(full):
|
||||
size = os.path.getsize(full)
|
||||
print(f" {f} ({size:,} bytes)")
|
||||
else:
|
||||
print(f" {f} (not found)")
|
||||
|
||||
# Warn about high-risk files present
|
||||
risky = {
|
||||
"xl/vbaProject.bin": "VBA macros — DO NOT modify",
|
||||
"xl/pivotTables": "Pivot tables — update source ranges carefully if shifting rows",
|
||||
"xl/charts": "Charts — update data ranges if shifting rows",
|
||||
}
|
||||
print("\n [High-risk content detected:]")
|
||||
found_any = False
|
||||
for path, warning in risky.items():
|
||||
full = os.path.join(output_dir, path)
|
||||
if os.path.exists(full):
|
||||
print(f" ⚠️ {path} — {warning}")
|
||||
found_any = True
|
||||
if not found_any:
|
||||
print(" ✓ None (safe to edit)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: xlsx_unpack.py <input.xlsx> <output_dir>")
|
||||
sys.exit(1)
|
||||
unpack(sys.argv[1], sys.argv[2])
|
||||
Reference in New Issue
Block a user