Files
skills/minimax-xlsx/scripts/shared_strings_builder.py
shihao 6487becf60 Initial commit: add all skills files
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 16:52:49 +08:00

164 lines
5.1 KiB
Python

#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
"""
shared_strings_builder.py — Generate a valid sharedStrings.xml from a list of strings.
Usage (strings as command-line arguments):
python3 shared_strings_builder.py "Revenue" "Cost" "Gross Profit" > sharedStrings.xml
Usage (strings from a file, one per line):
python3 shared_strings_builder.py --file strings.txt > sharedStrings.xml
Usage (print index table instead of XML, for reference):
python3 shared_strings_builder.py --index "Revenue" "Cost" "Gross Profit"
python3 shared_strings_builder.py --index --file strings.txt
Output format:
Valid xl/sharedStrings.xml written to stdout.
Redirect to the correct path:
python3 shared_strings_builder.py "A" "B" > /tmp/xlsx_work/xl/sharedStrings.xml
Notes:
- Strings are de-duplicated: identical strings appear only once in the table.
- The 'count' attribute equals the number of unique strings (appropriate for new files
where each string is used in exactly one cell). If a string appears in multiple cells,
manually increment 'count' by the number of extra references.
- Special characters (&, <, >) are automatically XML-escaped.
- Leading/trailing spaces are preserved with xml:space="preserve".
"""
import sys
import html
import argparse
HEADER = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
SST_NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
def escape_text(s: str) -> tuple[str, bool]:
"""
Return (escaped_text, needs_preserve).
needs_preserve is True if the string has leading or trailing whitespace.
"""
escaped = html.escape(s, quote=False)
needs_preserve = s != s.strip()
return escaped, needs_preserve
def build_xml(strings: list[str]) -> str:
"""Build sharedStrings.xml content from a list of unique strings."""
n = len(strings)
lines = [
HEADER,
f'<sst xmlns="{SST_NS}" count="{n}" uniqueCount="{n}">',
]
for i, s in enumerate(strings):
escaped, preserve = escape_text(s)
if preserve:
lines.append(f' <si><t xml:space="preserve">{escaped}</t></si>'
f' <!-- index {i} -->')
else:
lines.append(f' <si><t>{escaped}</t></si> <!-- index {i} -->')
lines.append("</sst>")
return "\n".join(lines) + "\n"
def build_index_table(strings: list[str]) -> str:
"""Return a human-readable index table (for agent reference, not written to file)."""
lines = [
f"{'Index':<6} String",
"-" * 50,
]
for i, s in enumerate(strings):
lines.append(f"{i:<6} {s!r}")
lines.append("")
lines.append(
f"Total: {len(strings)} unique strings. "
"Use these indices in <c t=\"s\"><v>N</v></c> cells."
)
return "\n".join(lines) + "\n"
def deduplicate(strings: list[str]) -> list[str]:
"""Remove duplicates while preserving first-occurrence order."""
seen: set[str] = set()
result: list[str] = []
for s in strings:
if s not in seen:
seen.add(s)
result.append(s)
return result
def load_from_file(path: str) -> list[str]:
"""Read one string per non-empty line from a file."""
with open(path, encoding="utf-8") as f:
return [line.rstrip("\n") for line in f if line.strip()]
def main() -> None:
parser = argparse.ArgumentParser(
description="Generate xl/sharedStrings.xml from a list of strings.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
parser.add_argument(
"strings",
nargs="*",
metavar="STRING",
help="String values to include in the shared string table.",
)
parser.add_argument(
"--file",
"-f",
metavar="PATH",
help="Read strings from a file (one string per line) instead of arguments.",
)
parser.add_argument(
"--index",
action="store_true",
help="Print a human-readable index table instead of XML output.",
)
args = parser.parse_args()
if args.file:
try:
raw = load_from_file(args.file)
except FileNotFoundError:
print(f"ERROR: File not found: {args.file}", file=sys.stderr)
sys.exit(1)
except OSError as e:
print(f"ERROR: Cannot read file: {e}", file=sys.stderr)
sys.exit(1)
else:
raw = list(args.strings)
if not raw:
print(
"ERROR: No strings provided.\n"
"Usage: shared_strings_builder.py \"String1\" \"String2\" ...\n"
" or: shared_strings_builder.py --file strings.txt",
file=sys.stderr,
)
sys.exit(1)
strings = deduplicate(raw)
if len(strings) < len(raw):
removed = len(raw) - len(strings)
print(
f"Note: {removed} duplicate(s) removed. "
f"{len(strings)} unique strings in table.",
file=sys.stderr,
)
if args.index:
print(build_index_table(strings))
else:
print(build_xml(strings), end="")
if __name__ == "__main__":
main()