Files
skills/minimax-xlsx/scripts/xlsx_shift_rows.py
shihao 6487becf60 Initial commit: add all skills files
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 16:52:49 +08:00

397 lines
13 KiB
Python

#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
"""
xlsx_shift_rows.py — Shift all row references in an unpacked xlsx working directory
after inserting or deleting rows.
Usage:
# Insert 2 rows at row 5 (rows 5+ shift down by 2)
python3 xlsx_shift_rows.py <work_dir> insert 5 2
# Delete 1 row at row 8 (rows 9+ shift up by 1)
python3 xlsx_shift_rows.py <work_dir> delete 8 1
What it updates in every XML file under <work_dir>:
- <row r="N"> attributes in worksheet sheetData
- <c r="XN"> cell address attributes in worksheet sheetData
- <f> formula text: absolute row references (e.g. B7, $B$7, $B7) in all sheets
- <mergeCell ref="A5:C7"> ranges
- <conditionalFormatting sqref="..."> ranges
- <dataValidations sqref="..."> ranges
- <dimension ref="A1:D20"> extent marker
- Table <table ref="A1:D20"> in xl/tables/*.xml
- Chart series <numRef><f> and <strRef><f> range references in xl/charts/*.xml
- PivotCache source <worksheetSource ref="..."> in xl/pivotCaches/*.xml
IMPORTANT: Run this script on the UNPACKED directory before repacking.
After running, repack with xlsx_pack.py and re-validate with formula_check.py.
Limitations:
- Named ranges in workbook.xml <definedNames> are NOT updated automatically.
Review them manually after running this script.
- Structured table references (Table[@Column]) are NOT updated.
- External workbook links in xl/externalLinks/ are NOT updated.
"""
import sys
import os
import re
import xml.etree.ElementTree as ET
import xml.dom.minidom
def col_letter(n: int) -> str:
"""Convert 1-based column number to Excel column letter(s)."""
r = ""
while n > 0:
n, rem = divmod(n - 1, 26)
r = chr(65 + rem) + r
return r
def col_number(s: str) -> int:
"""Convert Excel column letter(s) to 1-based column number."""
n = 0
for c in s.upper():
n = n * 26 + (ord(c) - 64)
return n
# ---------------------------------------------------------------------------
# Core shifting logic for formula strings
# ---------------------------------------------------------------------------
def _shift_refs(text: str, at: int, delta: int) -> str:
"""Shift cell references in a non-quoted formula fragment."""
def replacer(m: re.Match) -> str:
dollar_col = m.group(1) # "$" or ""
col_part = m.group(2) # e.g. "B" or "AB"
dollar_row = m.group(3) # "$" or ""
row_str = m.group(4) # e.g. "7"
row = int(row_str)
if row >= at:
row = max(1, row + delta)
return f"{dollar_col}{col_part}{dollar_row}{row}"
pattern = r'(\$?)([A-Z]+)(\$?)(\d+)'
return re.sub(pattern, replacer, text)
def shift_formula(formula: str, at: int, delta: int) -> str:
"""
Shift absolute and mixed row references >= `at` by `delta` in a formula string.
Handles:
B7 (relative col, absolute row — shifts if row >= at)
$B$7 (absolute col, absolute row — shifts)
$B7 (absolute col, relative row — shifts)
B$7 (relative col, absolute — shifts)
BUT NOT: B:B (whole-column reference — left as-is)
Skips content inside single-quoted sheet name prefixes to avoid
corrupting names like 'Budget FY2025' (where FY2025 is NOT a cell ref).
Does NOT handle:
- Named ranges
- Structured references (Table[@Col])
- R1C1 notation
"""
# Split on quoted sheet names: 'Sheet Name' portions are odd-indexed
segments = re.split(r"('[^']*(?:''[^']*)*')", formula)
result = []
for i, seg in enumerate(segments):
if i % 2 == 1:
result.append(seg)
else:
result.append(_shift_refs(seg, at, delta))
return "".join(result)
def shift_sqref(sqref: str, at: int, delta: int) -> str:
"""
Shift row references in a sqref string (space-separated cell/range addresses).
E.g. "A5:D20 B30" → shift rows >= 5 by delta.
"""
parts = sqref.split()
result = []
for part in parts:
if ':' in part:
left, right = part.split(':', 1)
left = shift_formula(left, at, delta)
right = shift_formula(right, at, delta)
result.append(f"{left}:{right}")
else:
result.append(shift_formula(part, at, delta))
return " ".join(result)
def shift_chart_range(text: str, at: int, delta: int) -> str:
"""
Shift row references inside a chart range formula like:
Sheet1!$B$5:$B$20
'Q1 Data'!$A$3:$A$15
"""
# Split on the "!" to preserve sheet name
if '!' not in text:
return text
bang = text.index('!')
sheet_part = text[:bang + 1]
range_part = text[bang + 1:]
return sheet_part + shift_formula(range_part, at, delta)
# ---------------------------------------------------------------------------
# XML file processors
# ---------------------------------------------------------------------------
NS_MAIN = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
NS_DRAWING = "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing"
# Namespace map used by ElementTree for tag lookup
NSMAP = {"ss": NS_MAIN}
def _tag(local: str) -> str:
return f"{{{NS_MAIN}}}{local}"
def process_worksheet(path: str, at: int, delta: int) -> int:
"""Update row/cell references in a worksheet XML. Returns change count."""
tree = ET.parse(path)
root = tree.getroot()
changes = 0
# 1. <dimension ref="A1:D20">
for dim in root.iter(_tag("dimension")):
old = dim.get("ref", "")
new = shift_sqref(old, at, delta)
if new != old:
dim.set("ref", new)
changes += 1
# 2. <row r="N"> and <c r="XN"> inside sheetData
sheet_data = root.find(_tag("sheetData"))
if sheet_data is not None:
rows_to_reorder = []
for row_el in list(sheet_data):
r_str = row_el.get("r")
if r_str is None:
continue
r = int(r_str)
if r >= at:
new_r = max(1, r + delta)
row_el.set("r", str(new_r))
changes += 1
# Update each cell's r attribute
for cell_el in row_el:
cell_ref = cell_el.get("r", "")
if cell_ref:
new_ref = shift_formula(cell_ref, at, delta)
if new_ref != cell_ref:
cell_el.set("r", new_ref)
changes += 1
# Also update formulas in every row (formulas can reference any row)
for cell_el in row_el:
f_el = cell_el.find(_tag("f"))
if f_el is not None and f_el.text:
new_f = shift_formula(f_el.text, at, delta)
if new_f != f_el.text:
f_el.text = new_f
changes += 1
# 3. <mergeCell ref="A5:C7">
for mc in root.iter(_tag("mergeCell")):
old = mc.get("ref", "")
new = shift_sqref(old, at, delta)
if new != old:
mc.set("ref", new)
changes += 1
# 4. <conditionalFormatting sqref="...">
for cf in root.iter(_tag("conditionalFormatting")):
old = cf.get("sqref", "")
new = shift_sqref(old, at, delta)
if new != old:
cf.set("sqref", new)
changes += 1
# 5. <dataValidation sqref="...">
for dv in root.iter(_tag("dataValidation")):
old = dv.get("sqref", "")
new = shift_sqref(old, at, delta)
if new != old:
dv.set("sqref", new)
changes += 1
if changes > 0:
_write_tree(tree, path)
return changes
def process_chart(path: str, at: int, delta: int) -> int:
"""Update data range references in a chart XML."""
# Charts use DrawingML namespace; we look for <f> elements with range strings
with open(path, "r", encoding="utf-8") as fh:
content = fh.read()
# Pattern matches content of <f>Sheet1!$A$1:$A$10</f> style elements
def replace_f(m: re.Match) -> str:
tag_open = m.group(1)
inner = m.group(2)
tag_close = m.group(3)
new_inner = shift_chart_range(inner, at, delta)
return f"{tag_open}{new_inner}{tag_close}"
new_content = re.sub(r'(<(?:[^:>]+:)?f>)([^<]+)(</(?:[^:>]+:)?f>)',
replace_f, content)
changes = content != new_content
if changes:
with open(path, "w", encoding="utf-8") as fh:
fh.write(new_content)
return 1 if changes else 0
def process_table(path: str, at: int, delta: int) -> int:
"""Update the ref attribute on the <table> root element."""
tree = ET.parse(path)
root = tree.getroot()
# The root element IS the table
old = root.get("ref", "")
if not old:
return 0
new = shift_sqref(old, at, delta)
if new == old:
return 0
root.set("ref", new)
_write_tree(tree, path)
return 1
def process_pivot_cache(path: str, at: int, delta: int) -> int:
"""Update worksheetSource ref in a pivot cache definition."""
tree = ET.parse(path)
root = tree.getroot()
changes = 0
# Look for <worksheetSource ref="A1:D100" ...>
for ws in root.iter():
if ws.tag.endswith("}worksheetSource") or ws.tag == "worksheetSource":
old = ws.get("ref", "")
if old:
new = shift_sqref(old, at, delta)
if new != old:
ws.set("ref", new)
changes += 1
if changes:
_write_tree(tree, path)
return changes
def _write_tree(tree: ET.ElementTree, path: str) -> None:
"""Write ElementTree back to file with pretty-printing."""
tree.write(path, encoding="unicode", xml_declaration=False)
# Re-pretty-print for readability
with open(path, "r", encoding="utf-8") as fh:
raw = fh.read()
try:
dom = xml.dom.minidom.parseString(raw.encode("utf-8"))
pretty = dom.toprettyxml(indent=" ", encoding="utf-8").decode("utf-8")
lines = [line for line in pretty.splitlines() if line.strip()]
with open(path, "w", encoding="utf-8") as fh:
fh.write("\n".join(lines) + "\n")
except Exception:
pass # If pretty-print fails, leave the file as-is
# ---------------------------------------------------------------------------
# Main driver
# ---------------------------------------------------------------------------
def main() -> None:
if len(sys.argv) < 5:
print(__doc__)
sys.exit(1)
work_dir = sys.argv[1]
operation = sys.argv[2].lower()
at = int(sys.argv[3])
count = int(sys.argv[4])
if operation not in ("insert", "delete"):
print(f"ERROR: operation must be 'insert' or 'delete', got '{operation}'")
sys.exit(1)
if operation == "insert":
delta = count
else:
delta = -count
if not os.path.isdir(work_dir):
print(f"ERROR: Directory not found: {work_dir}")
sys.exit(1)
print(f"Operation : {operation} {count} row(s) at row {at} (delta={delta:+d})")
print(f"Work dir : {work_dir}")
print()
total_changes = 0
# Process all worksheets
ws_dir = os.path.join(work_dir, "xl", "worksheets")
if os.path.isdir(ws_dir):
for fname in sorted(os.listdir(ws_dir)):
if fname.endswith(".xml"):
fpath = os.path.join(ws_dir, fname)
n = process_worksheet(fpath, at, delta)
if n:
print(f" Updated {n:3d} references in xl/worksheets/{fname}")
total_changes += n
# Process all charts
charts_dir = os.path.join(work_dir, "xl", "charts")
if os.path.isdir(charts_dir):
for fname in sorted(os.listdir(charts_dir)):
if fname.endswith(".xml"):
fpath = os.path.join(charts_dir, fname)
n = process_chart(fpath, at, delta)
if n:
print(f" Updated chart ranges in xl/charts/{fname}")
total_changes += n
# Process all tables
tables_dir = os.path.join(work_dir, "xl", "tables")
if os.path.isdir(tables_dir):
for fname in sorted(os.listdir(tables_dir)):
if fname.endswith(".xml"):
fpath = os.path.join(tables_dir, fname)
n = process_table(fpath, at, delta)
if n:
print(f" Updated table ref in xl/tables/{fname}")
total_changes += n
# Process pivot cache definitions
cache_dir = os.path.join(work_dir, "xl", "pivotCaches")
if os.path.isdir(cache_dir):
for fname in sorted(os.listdir(cache_dir)):
if "Definition" in fname and fname.endswith(".xml"):
fpath = os.path.join(cache_dir, fname)
n = process_pivot_cache(fpath, at, delta)
if n:
print(f" Updated pivot source range in xl/pivotCaches/{fname}")
total_changes += n
print()
print(f"Total changes: {total_changes}")
print()
print("IMPORTANT: Review named ranges in xl/workbook.xml <definedNames> manually.")
print(" Structured table references (Table[@Col]) are NOT updated.")
print()
print("Next steps:")
print(" 1. Review the changes above")
print(f" 2. python3 xlsx_pack.py {work_dir} output.xlsx")
print(" 3. python3 formula_check.py output.xlsx")
if __name__ == "__main__":
main()