Files
skills/minimax-xlsx/scripts/xlsx_add_column.py
shihao 6487becf60 Initial commit: add all skills files
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 16:52:49 +08:00

396 lines
14 KiB
Python

#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
"""
xlsx_add_column.py — Add a new column to a worksheet in an unpacked xlsx.
Usage examples:
# Add a percentage column with formulas and number format
python3 xlsx_add_column.py /tmp/work/ --col G \\
--sheet "Budget FY2025" \\
--header "% of Total" \\
--formula '=F{row}/$F$10' --formula-rows 2:9 \\
--total-row 10 --total-formula '=SUM(G2:G9)' \\
--numfmt '0.0%'
What it does:
1. Adds header cell (copies style from previous column's header)
2. Adds formula cells for the specified row range
3. Adds a total formula cell if specified
4. Creates a new cell style with the given numfmt if needed
5. Updates sharedStrings.xml for header text
6. Updates dimension ref and column definitions
IMPORTANT: Run on an UNPACKED directory (from xlsx_unpack.py).
After running, repack with xlsx_pack.py.
"""
import argparse
import copy
import os
import re
import sys
import xml.dom.minidom
import xml.etree.ElementTree as ET
NS_SS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
NS_REL = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
ET.register_namespace('', NS_SS)
ET.register_namespace('r', NS_REL)
ET.register_namespace('xdr', 'http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing')
ET.register_namespace('x14', 'http://schemas.microsoft.com/office/spreadsheetml/2009/9/main')
ET.register_namespace('xr2', 'http://schemas.microsoft.com/office/spreadsheetml/2015/revision2')
ET.register_namespace('mc', 'http://schemas.openxmlformats.org/markup-compatibility/2006')
def _tag(local: str) -> str:
return f"{{{NS_SS}}}{local}"
def _write_tree(tree: ET.ElementTree, path: str) -> None:
tree.write(path, encoding="unicode", xml_declaration=False)
with open(path, "r", encoding="utf-8") as fh:
raw = fh.read()
try:
dom = xml.dom.minidom.parseString(raw.encode("utf-8"))
pretty = dom.toprettyxml(indent=" ", encoding="utf-8").decode("utf-8")
lines = [line for line in pretty.splitlines() if line.strip()]
with open(path, "w", encoding="utf-8") as fh:
fh.write("\n".join(lines) + "\n")
except Exception:
pass
def col_number(s: str) -> int:
n = 0
for c in s.upper():
n = n * 26 + (ord(c) - 64)
return n
def col_letter(n: int) -> str:
r = ""
while n > 0:
n, rem = divmod(n - 1, 26)
r = chr(65 + rem) + r
return r
def find_ws_path(work_dir: str, sheet_name: str | None) -> str:
wb_tree = ET.parse(os.path.join(work_dir, "xl", "workbook.xml"))
rid = None
for sheet in wb_tree.getroot().iter(_tag("sheet")):
if sheet_name is None or sheet.get("name") == sheet_name:
rid = sheet.get(f"{{{NS_REL}}}id")
break
if rid is None:
print(f"ERROR: Sheet not found: {sheet_name}")
sys.exit(1)
rels_tree = ET.parse(os.path.join(work_dir, "xl", "_rels", "workbook.xml.rels"))
for rel in rels_tree.getroot():
if rel.get("Id") == rid:
return os.path.join(work_dir, "xl", rel.get("Target"))
print(f"ERROR: Relationship not found: {rid}")
sys.exit(1)
def add_shared_string(work_dir: str, text: str) -> int:
ss_path = os.path.join(work_dir, "xl", "sharedStrings.xml")
tree = ET.parse(ss_path)
root = tree.getroot()
idx = 0
for si in root.findall(_tag("si")):
t_el = si.find(_tag("t"))
if t_el is not None and t_el.text == text:
return idx
idx += 1
si = ET.SubElement(root, _tag("si"))
t = ET.SubElement(si, _tag("t"))
t.set("{http://www.w3.org/XML/1998/namespace}space", "preserve")
t.text = text
root.set("count", str(int(root.get("count", "0")) + 1))
root.set("uniqueCount", str(int(root.get("uniqueCount", "0")) + 1))
_write_tree(tree, ss_path)
return idx
def get_cell_style(ws_tree: ET.ElementTree, col: str, row: int) -> int:
ref = f"{col}{row}"
for row_el in ws_tree.getroot().iter(_tag("row")):
if row_el.get("r") == str(row):
for c in row_el:
if c.get("r") == ref:
return int(c.get("s", "0"))
return 0
def ensure_numfmt_style(work_dir: str, ref_style_idx: int, numfmt_code: str) -> int:
"""Clone a cellXfs entry with the given numfmt. Returns new style index."""
styles_path = os.path.join(work_dir, "xl", "styles.xml")
tree = ET.parse(styles_path)
root = tree.getroot()
# Find or add numFmt
numfmts = root.find(_tag("numFmts"))
numfmt_id = None
if numfmts is not None:
for nf in numfmts:
if nf.get("formatCode") == numfmt_code:
numfmt_id = int(nf.get("numFmtId"))
break
if numfmt_id is None:
max_id = 163
if numfmts is not None:
for nf in numfmts:
max_id = max(max_id, int(nf.get("numFmtId", "0")))
else:
numfmts = ET.SubElement(root, _tag("numFmts"))
numfmts.set("count", "0")
root.remove(numfmts)
root.insert(0, numfmts)
numfmt_id = max_id + 1
nf = ET.SubElement(numfmts, _tag("numFmt"))
nf.set("numFmtId", str(numfmt_id))
nf.set("formatCode", numfmt_code)
numfmts.set("count", str(len(list(numfmts))))
# Find or create cellXfs entry
cellxfs = root.find(_tag("cellXfs"))
xf_list = list(cellxfs)
ref_xf = xf_list[min(ref_style_idx, len(xf_list) - 1)]
for i, xf in enumerate(xf_list):
if (xf.get("numFmtId") == str(numfmt_id) and
xf.get("fontId") == ref_xf.get("fontId") and
xf.get("fillId") == ref_xf.get("fillId") and
xf.get("borderId") == ref_xf.get("borderId")):
return i
new_xf = copy.deepcopy(ref_xf)
new_xf.set("numFmtId", str(numfmt_id))
new_xf.set("applyNumberFormat", "true")
cellxfs.append(new_xf)
cellxfs.set("count", str(len(list(cellxfs))))
_write_tree(tree, styles_path)
return len(list(cellxfs)) - 1
def _apply_border_to_row(work_dir: str, ws_path: str, ws_tree: ET.ElementTree,
ws_root: ET.Element, row_map: dict, border_row: int,
border_style: str, new_col: str) -> None:
"""Apply a top border to ALL cells in the specified row (A through new_col)."""
styles_path = os.path.join(work_dir, "xl", "styles.xml")
st_tree = ET.parse(styles_path)
st_root = st_tree.getroot()
# 1. Create a new border entry with the specified top style
borders = st_root.find(_tag("borders"))
new_border = ET.SubElement(borders, _tag("border"))
for side in ("left", "right"):
ET.SubElement(new_border, _tag(side))
top_el = ET.SubElement(new_border, _tag("top"))
top_el.set("style", border_style)
ET.SubElement(new_border, _tag("bottom"))
ET.SubElement(new_border, _tag("diagonal"))
borders.set("count", str(len(list(borders))))
new_border_id = len(list(borders)) - 1
# 2. For each existing style used in the row, create a clone with the new borderId
cellxfs = st_root.find(_tag("cellXfs"))
style_remap = {} # old_style_idx -> new_style_idx
if border_row not in row_map:
return
row_el = row_map[border_row]
# Collect all cells in this row and their styles
for c in row_el:
old_s = int(c.get("s", "0"))
if old_s not in style_remap:
xf_list = list(cellxfs)
ref_xf = xf_list[min(old_s, len(xf_list) - 1)]
new_xf = copy.deepcopy(ref_xf)
new_xf.set("borderId", str(new_border_id))
new_xf.set("applyBorder", "true")
cellxfs.append(new_xf)
cellxfs.set("count", str(len(list(cellxfs))))
style_remap[old_s] = len(list(cellxfs)) - 1
# 3. Apply remapped styles to all cells in the row
for c in row_el:
old_s = int(c.get("s", "0"))
if old_s in style_remap:
c.set("s", str(style_remap[old_s]))
_write_tree(st_tree, styles_path)
last_col_num = col_number(new_col)
print(f" Applied {border_style} top border to all cells in row {border_row} "
f"(A-{new_col}, {len(style_remap)} style(s) cloned)")
def main() -> None:
parser = argparse.ArgumentParser(
description="Add a column to a worksheet in an unpacked xlsx")
parser.add_argument("work_dir", help="Unpacked xlsx working directory")
parser.add_argument("--col", required=True, help="Column letter (e.g., G)")
parser.add_argument("--sheet", default=None, help="Sheet name (default: first)")
parser.add_argument("--header", default=None, help="Header text for row 1")
parser.add_argument("--formula", default=None,
help="Formula template with {row} placeholder")
parser.add_argument("--formula-rows", default=None,
help="Row range for formulas (e.g., 2:9)")
parser.add_argument("--total-row", type=int, default=None,
help="Row number for total formula")
parser.add_argument("--total-formula", default=None,
help="Formula for total row")
parser.add_argument("--numfmt", default=None,
help="Number format for data/total cells (e.g., 0.0%%)")
parser.add_argument("--border-row", type=int, default=None,
help="Row to apply a top border to ALL cells (e.g., 10)")
parser.add_argument("--border-style", default="medium",
help="Border style: thin, medium, thick (default: medium)")
args = parser.parse_args()
col = args.col.upper()
prev_col = col_letter(col_number(col) - 1) if col_number(col) > 1 else "A"
ws_path = find_ws_path(args.work_dir, args.sheet)
ws_tree = ET.parse(ws_path)
changes = 0
print(f"Adding column {col} to {os.path.basename(ws_path)}")
# Resolve styles from previous column
header_style = get_cell_style(ws_tree, prev_col, 1) if args.header else 0
data_style = None
if args.formula_rows:
start_row = int(args.formula_rows.split(":")[0])
ref = get_cell_style(ws_tree, prev_col, start_row)
data_style = (ensure_numfmt_style(args.work_dir, ref, args.numfmt)
if args.numfmt else ref)
total_style = None
if args.total_row:
ref = get_cell_style(ws_tree, prev_col, args.total_row)
total_style = (ensure_numfmt_style(args.work_dir, ref, args.numfmt)
if args.numfmt else ref)
# Add header to sharedStrings
header_idx = add_shared_string(args.work_dir, args.header) if args.header else None
# Re-parse worksheet (sharedStrings write may have changed state)
ws_tree = ET.parse(ws_path)
root = ws_tree.getroot()
sheet_data = root.find(_tag("sheetData"))
row_map = {}
for row_el in sheet_data:
r = row_el.get("r")
if r:
row_map[int(r)] = row_el
# Add header cell
if args.header and 1 in row_map:
cell = ET.SubElement(row_map[1], _tag("c"))
cell.set("r", f"{col}1")
cell.set("s", str(header_style))
cell.set("t", "s")
v = ET.SubElement(cell, _tag("v"))
v.text = str(header_idx)
changes += 1
print(f" {col}1 = \"{args.header}\" (header, style={header_style})")
# Add formula cells
if args.formula and args.formula_rows:
start, end = map(int, args.formula_rows.split(":"))
for row_num in range(start, end + 1):
if row_num not in row_map:
row_el = ET.SubElement(sheet_data, _tag("row"))
row_el.set("r", str(row_num))
row_map[row_num] = row_el
formula_text = args.formula.replace("{row}", str(row_num))
formula_text = formula_text.lstrip("=")
cell = ET.SubElement(row_map[row_num], _tag("c"))
cell.set("r", f"{col}{row_num}")
if data_style is not None:
cell.set("s", str(data_style))
f_el = ET.SubElement(cell, _tag("f"))
f_el.text = formula_text
changes += 1
print(f" {col}{start}:{col}{end} = formulas (style={data_style})")
# Add total formula
if args.total_row and args.total_formula:
if args.total_row not in row_map:
row_el = ET.SubElement(sheet_data, _tag("row"))
row_el.set("r", str(args.total_row))
row_map[args.total_row] = row_el
total_f = args.total_formula.lstrip("=")
cell = ET.SubElement(row_map[args.total_row], _tag("c"))
cell.set("r", f"{col}{args.total_row}")
if total_style is not None:
cell.set("s", str(total_style))
f_el = ET.SubElement(cell, _tag("f"))
f_el.text = total_f
changes += 1
print(f" {col}{args.total_row} = ={total_f} (style={total_style})")
# Update dimension
for dim in root.iter(_tag("dimension")):
old_ref = dim.get("ref", "")
if ":" in old_ref:
start_ref, end_ref = old_ref.split(":")
end_col_str = re.match(r"([A-Z]+)", end_ref).group(1)
end_row_str = re.search(r"(\d+)", end_ref).group(1)
if col_number(col) > col_number(end_col_str):
new_ref = f"{start_ref}:{col}{end_row_str}"
dim.set("ref", new_ref)
print(f" Dimension: {old_ref}{new_ref}")
# Extend <cols> to cover new column
cols_el = root.find(_tag("cols"))
if cols_el is not None:
new_col_num = col_number(col)
covered = any(
int(c.get("min", "0")) <= new_col_num <= int(c.get("max", "0"))
for c in cols_el
)
if not covered:
prev_num = col_number(prev_col)
for c in cols_el:
if int(c.get("min", "0")) <= prev_num <= int(c.get("max", "0")):
new_col_def = copy.deepcopy(c)
new_col_def.set("min", str(new_col_num))
new_col_def.set("max", str(new_col_num))
cols_el.append(new_col_def)
print(f" Added <col> definition for column {col}")
break
# Apply border to entire row if requested
if args.border_row:
_apply_border_to_row(args.work_dir, ws_path, ws_tree, root,
row_map, args.border_row, args.border_style,
col)
_write_tree(ws_tree, ws_path)
print(f"\nDone. {changes} cells added.")
print(f"\nNext: python3 xlsx_pack.py {args.work_dir} output.xlsx")
if __name__ == "__main__":
main()