Initial commit: add all skills files

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-10 16:52:49 +08:00
commit 6487becf60
396 changed files with 108871 additions and 0 deletions

View File

@@ -0,0 +1,147 @@
using System.CommandLine;
using System.IO.Compression;
using System.Text.Json;
using System.Xml.Linq;
namespace MiniMaxAIDocx.Core.Commands;
public static class AnalyzeCommand
{
private static readonly XNamespace W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
private static readonly XNamespace WP = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing";
public static Command Create()
{
var inputOption = new Option<string>("--input") { Description = "DOCX file to analyze", Required = true };
var jsonOption = new Option<bool>("--json") { Description = "Output as JSON" };
var cmd = new Command("analyze", "Analyze document structure and styles")
{
inputOption, jsonOption
};
cmd.SetAction((parseResult) =>
{
var input = parseResult.GetValue(inputOption)!;
var asJson = parseResult.GetValue(jsonOption);
if (!File.Exists(input))
{
Console.Error.WriteLine($"File not found: {input}");
return;
}
using var zip = ZipFile.OpenRead(input);
var docEntry = zip.GetEntry("word/document.xml");
if (docEntry == null)
{
Console.Error.WriteLine("Not a valid DOCX");
return;
}
XDocument doc;
using (var stream = docEntry.Open())
doc = XDocument.Load(stream);
var body = doc.Root?.Element(W + "body");
if (body == null) return;
// Sections
var sections = body.Descendants(W + "sectPr").ToList();
var sectionBreaks = sections.Select(s => (string?)s.Element(W + "type")?.Attribute(W + "val") ?? "nextPage").ToList();
// Headings
var headings = new List<object>();
foreach (var p in body.Descendants(W + "p"))
{
var style = (string?)p.Element(W + "pPr")?.Element(W + "pStyle")?.Attribute(W + "val");
if (style?.StartsWith("Heading", StringComparison.OrdinalIgnoreCase) == true)
{
var text = string.Concat(p.Descendants(W + "t").Select(t => t.Value));
headings.Add(new { style, text });
}
}
// Tables
var tables = body.Descendants(W + "tbl").Select(tbl => new
{
rows = tbl.Elements(W + "tr").Count(),
cols = tbl.Elements(W + "tr").FirstOrDefault()?.Elements(W + "tc").Count() ?? 0
}).ToList();
// Images
var images = body.Descendants(W + "drawing").Count();
// Headers/footers
var headerRefs = sections.SelectMany(s => s.Elements(W + "headerReference")).Count();
var footerRefs = sections.SelectMany(s => s.Elements(W + "footerReference")).Count();
// Paragraphs and word count
var paragraphs = body.Descendants(W + "p").ToList();
var allText = string.Concat(body.Descendants(W + "t").Select(t => t.Value));
var wordCount = allText.Split(new[] { ' ', '\t', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries).Length;
// XML file sizes
var fileSizes = zip.Entries
.Where(e => e.FullName.StartsWith("word/") && e.FullName.EndsWith(".xml"))
.Select(e => new { file = e.FullName, size = e.Length })
.OrderByDescending(e => e.size)
.ToList();
// Styles
var styleNames = new List<string>();
var stylesEntry = zip.GetEntry("word/styles.xml");
if (stylesEntry != null)
{
using var stream = stylesEntry.Open();
var stylesDoc = XDocument.Load(stream);
styleNames = stylesDoc.Descendants(W + "style")
.Where(s => (string?)s.Attribute(W + "customStyle") == "1")
.Select(s => (string?)s.Attribute(W + "styleId") ?? "")
.Where(s => s != "")
.ToList();
}
var analysis = new
{
sections = new { count = sections.Count, breakTypes = sectionBreaks },
headings,
tables = new { count = tables.Count, details = tables },
images,
headerFooter = new { headers = headerRefs, footers = footerRefs },
paragraphs = paragraphs.Count,
estimatedWordCount = wordCount,
xmlFileSizes = fileSizes,
customStyles = new { count = styleNames.Count, names = styleNames }
};
if (asJson)
{
Console.WriteLine(JsonSerializer.Serialize(analysis, new JsonSerializerOptions { WriteIndented = true }));
}
else
{
Console.WriteLine($"Sections: {sections.Count} ({string.Join(", ", sectionBreaks)})");
Console.WriteLine($"Headings: {headings.Count}");
foreach (var h in headings)
Console.WriteLine($" {h}");
Console.WriteLine($"Tables: {tables.Count}");
foreach (var t in tables)
Console.WriteLine($" {t.rows} rows x {t.cols} cols");
Console.WriteLine($"Images: {images}");
Console.WriteLine($"Headers: {headerRefs}");
Console.WriteLine($"Footers: {footerRefs}");
Console.WriteLine($"Paragraphs: {paragraphs.Count}");
Console.WriteLine($"Word count: ~{wordCount}");
Console.WriteLine($"Custom styles: {styleNames.Count}");
foreach (var s in styleNames)
Console.WriteLine($" {s}");
Console.WriteLine("XML file sizes:");
foreach (var f in fileSizes)
Console.WriteLine($" {f.file}: {f.size:N0} bytes");
}
});
return cmd;
}
}

View File

@@ -0,0 +1,322 @@
using System.CommandLine;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
namespace MiniMaxAIDocx.Core.Commands;
/// <summary>
/// Scenario C: Apply formatting from a template DOCX to a source DOCX.
/// Copies styles, theme, numbering, headers/footers, and section properties
/// from the template while preserving all content from the source.
/// </summary>
public static class ApplyTemplateCommand
{
public static Command Create()
{
var inputOpt = new Option<string>("--input") { Description = "Source DOCX (content to keep)", Required = true };
var templateOpt = new Option<string>("--template") { Description = "Template DOCX (formatting to apply)", Required = true };
var outputOpt = new Option<string>("--output") { Description = "Output DOCX file path", Required = true };
var applyStylesOpt = new Option<bool>("--apply-styles") { Description = "Copy styles.xml from template" };
applyStylesOpt.DefaultValueFactory = _ => true;
var applyThemeOpt = new Option<bool>("--apply-theme") { Description = "Copy theme from template" };
applyThemeOpt.DefaultValueFactory = _ => true;
var applyNumberingOpt = new Option<bool>("--apply-numbering") { Description = "Copy numbering.xml from template" };
applyNumberingOpt.DefaultValueFactory = _ => true;
var applyHeadersFootersOpt = new Option<bool>("--apply-headers-footers") { Description = "Copy headers/footers from template" };
var applySectionsOpt = new Option<bool>("--apply-sections") { Description = "Apply section properties from template" };
applySectionsOpt.DefaultValueFactory = _ => true;
var cmd = new Command("apply-template", "Apply template formatting to a DOCX")
{
inputOpt, templateOpt, outputOpt, applyStylesOpt, applyThemeOpt,
applyNumberingOpt, applyHeadersFootersOpt, applySectionsOpt
};
cmd.SetAction((parseResult) =>
{
var inputPath = parseResult.GetValue(inputOpt)!;
var templatePath = parseResult.GetValue(templateOpt)!;
var outputPath = parseResult.GetValue(outputOpt)!;
var applyStyles = parseResult.GetValue(applyStylesOpt);
var applyTheme = parseResult.GetValue(applyThemeOpt);
var applyNumbering = parseResult.GetValue(applyNumberingOpt);
var applyHeadersFooters = parseResult.GetValue(applyHeadersFootersOpt);
var applySections = parseResult.GetValue(applySectionsOpt);
if (!File.Exists(inputPath)) { Console.Error.WriteLine($"Input file not found: {inputPath}"); return; }
if (!File.Exists(templatePath)) { Console.Error.WriteLine($"Template file not found: {templatePath}"); return; }
// Create output as a copy of the source
File.Copy(inputPath, outputPath, overwrite: true);
using var output = WordprocessingDocument.Open(outputPath, true);
using var template = WordprocessingDocument.Open(templatePath, false);
var outputMain = output.MainDocumentPart;
var templateMain = template.MainDocumentPart;
if (outputMain == null || templateMain == null)
{
Console.Error.WriteLine("Invalid document: missing main document part.");
return;
}
int appliedCount = 0;
if (applyStyles)
{
CopyStyles(templateMain, outputMain);
appliedCount++;
Console.WriteLine(" Applied: styles");
}
if (applyTheme)
{
CopyTheme(templateMain, outputMain);
appliedCount++;
Console.WriteLine(" Applied: theme");
}
if (applyNumbering)
{
CopyNumbering(templateMain, outputMain);
appliedCount++;
Console.WriteLine(" Applied: numbering");
}
if (applyHeadersFooters)
{
CopyHeadersAndFooters(templateMain, outputMain);
appliedCount++;
Console.WriteLine(" Applied: headers/footers");
}
if (applySections)
{
CopySectionProperties(templateMain, outputMain);
appliedCount++;
Console.WriteLine(" Applied: section properties");
}
outputMain.Document.Save();
Console.WriteLine($"Applied {appliedCount} formatting component(s) from template to {outputPath}");
});
return cmd;
}
/// <summary>
/// Replaces the output's StyleDefinitionsPart with the template's version.
/// </summary>
private static void CopyStyles(MainDocumentPart template, MainDocumentPart output)
{
var templateStyles = template.StyleDefinitionsPart;
if (templateStyles == null) return;
if (output.StyleDefinitionsPart != null)
output.DeletePart(output.StyleDefinitionsPart);
var newStylesPart = output.AddNewPart<StyleDefinitionsPart>();
using var stream = templateStyles.GetStream(FileMode.Open, FileAccess.Read);
newStylesPart.FeedData(stream);
}
/// <summary>
/// Replaces the output's ThemePart with the template's version.
/// </summary>
private static void CopyTheme(MainDocumentPart template, MainDocumentPart output)
{
var templateTheme = template.ThemePart;
if (templateTheme == null) return;
if (output.ThemePart != null)
output.DeletePart(output.ThemePart);
var newThemePart = output.AddNewPart<ThemePart>();
using var stream = templateTheme.GetStream(FileMode.Open, FileAccess.Read);
newThemePart.FeedData(stream);
}
/// <summary>
/// Copies numbering definitions from template, remapping numbering IDs
/// referenced in the output document's paragraphs.
/// </summary>
private static void CopyNumbering(MainDocumentPart template, MainDocumentPart output)
{
var templateNumbering = template.NumberingDefinitionsPart;
if (templateNumbering == null) return;
var referencedNumIds = new HashSet<string>();
var body = output.Document.Body;
if (body != null)
{
foreach (var numId in body.Descendants<NumberingId>())
{
if (numId.Val?.Value != null)
referencedNumIds.Add(numId.Val.Value.ToString());
}
}
if (output.NumberingDefinitionsPart != null)
output.DeletePart(output.NumberingDefinitionsPart);
var newNumberingPart = output.AddNewPart<NumberingDefinitionsPart>();
using var stream = templateNumbering.GetStream(FileMode.Open, FileAccess.Read);
newNumberingPart.FeedData(stream);
if (referencedNumIds.Count > 0)
{
Console.WriteLine($" Note: {referencedNumIds.Count} numbering reference(s) in document content mapped to template definitions.");
}
}
/// <summary>
/// Copies headers and footers from the template, remapping relationship IDs.
/// </summary>
private static void CopyHeadersAndFooters(MainDocumentPart template, MainDocumentPart output)
{
var outputBody = output.Document.Body;
if (outputBody == null) return;
// Remove existing header/footer parts from output
foreach (var hp in output.HeaderParts.ToList())
output.DeletePart(hp);
foreach (var fp in output.FooterParts.ToList())
output.DeletePart(fp);
// Remove existing header/footer references from all section properties
foreach (var sectPr in outputBody.Descendants<SectionProperties>())
{
foreach (var hr in sectPr.Elements<HeaderReference>().ToList())
hr.Remove();
foreach (var fr in sectPr.Elements<FooterReference>().ToList())
fr.Remove();
}
var templateBody = template.Document?.Body;
if (templateBody == null) return;
var templateFinalSectPr = templateBody.Descendants<SectionProperties>().LastOrDefault();
if (templateFinalSectPr == null) return;
var outputFinalSectPr = outputBody.Descendants<SectionProperties>().LastOrDefault();
if (outputFinalSectPr == null)
{
outputFinalSectPr = new SectionProperties();
outputBody.Append(outputFinalSectPr);
}
// Copy headers
foreach (var headerRef in templateFinalSectPr.Elements<HeaderReference>())
{
var templateHeaderPart = template.GetPartById(headerRef.Id!) as HeaderPart;
if (templateHeaderPart == null) continue;
var newHeaderPart = output.AddNewPart<HeaderPart>();
using (var stream = templateHeaderPart.GetStream(FileMode.Open, FileAccess.Read))
{
newHeaderPart.FeedData(stream);
}
CopyPartRelationships(templateHeaderPart, newHeaderPart);
var newRefId = output.GetIdOfPart(newHeaderPart);
outputFinalSectPr.InsertAt(new HeaderReference
{
Type = headerRef.Type,
Id = newRefId
}, 0);
}
// Copy footers
foreach (var footerRef in templateFinalSectPr.Elements<FooterReference>())
{
var templateFooterPart = template.GetPartById(footerRef.Id!) as FooterPart;
if (templateFooterPart == null) continue;
var newFooterPart = output.AddNewPart<FooterPart>();
using (var stream = templateFooterPart.GetStream(FileMode.Open, FileAccess.Read))
{
newFooterPart.FeedData(stream);
}
CopyPartRelationships(templateFooterPart, newFooterPart);
var newRefId = output.GetIdOfPart(newFooterPart);
var lastHeaderRef = outputFinalSectPr.Elements<HeaderReference>().LastOrDefault();
if (lastHeaderRef != null)
lastHeaderRef.InsertAfterSelf(new FooterReference { Type = footerRef.Type, Id = newRefId });
else
outputFinalSectPr.InsertAt(new FooterReference { Type = footerRef.Type, Id = newRefId }, 0);
}
}
/// <summary>
/// Copies sub-relationships (images, etc.) from a source part to a target part.
/// </summary>
private static void CopyPartRelationships(OpenXmlPart source, OpenXmlPart target)
{
foreach (var rel in source.ExternalRelationships)
{
target.AddExternalRelationship(rel.RelationshipType, rel.Uri, rel.Id);
}
foreach (var childPart in source.Parts)
{
try
{
var contentType = childPart.OpenXmlPart.ContentType;
if (contentType.StartsWith("image/"))
{
var newChild = target.AddNewPart<ImagePart>(contentType, childPart.RelationshipId);
using var stream = childPart.OpenXmlPart.GetStream(FileMode.Open, FileAccess.Read);
newChild.FeedData(stream);
}
}
catch (Exception ex)
{
Console.Error.WriteLine($"[WARN] Skipped non-image embedded part: {ex.Message}");
}
}
}
/// <summary>
/// Copies page size, margins, columns, and document grid from template section properties.
/// </summary>
private static void CopySectionProperties(MainDocumentPart template, MainDocumentPart output)
{
var templateBody = template.Document?.Body;
var outputBody = output.Document?.Body;
if (templateBody == null || outputBody == null) return;
var templateSectPr = templateBody.Descendants<SectionProperties>().LastOrDefault();
if (templateSectPr == null) return;
var outputSectPr = outputBody.Descendants<SectionProperties>().LastOrDefault();
if (outputSectPr == null)
{
outputSectPr = new SectionProperties();
outputBody.Append(outputSectPr);
}
CopyChildElement<PageSize>(templateSectPr, outputSectPr);
CopyChildElement<PageMargin>(templateSectPr, outputSectPr);
CopyChildElement<Columns>(templateSectPr, outputSectPr);
CopyChildElement<DocGrid>(templateSectPr, outputSectPr);
CopyChildElement<PageBorders>(templateSectPr, outputSectPr);
}
private static void CopyChildElement<T>(SectionProperties source, SectionProperties target) where T : OpenXmlElement
{
var sourceElement = source.GetFirstChild<T>();
if (sourceElement == null) return;
var existing = target.GetFirstChild<T>();
existing?.Remove();
target.Append((T)sourceElement.CloneNode(true));
}
}

View File

@@ -0,0 +1,324 @@
using System.CommandLine;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using MiniMaxAIDocx.Core.OpenXml;
using MiniMaxAIDocx.Core.Typography;
namespace MiniMaxAIDocx.Core.Commands;
/// <summary>
/// Scenario A: Create a new DOCX document from scratch with proper styles, sections,
/// headers/footers, and typography defaults.
/// </summary>
public static class CreateCommand
{
public static Command Create()
{
var outputOption = new Option<string>("--output") { Description = "Output DOCX file path", Required = true };
var typeOption = new Option<string>("--type") { Description = "Document type: report, letter, memo, academic" };
typeOption.DefaultValueFactory = _ => "report";
var titleOption = new Option<string>("--title") { Description = "Document title" };
var authorOption = new Option<string>("--author") { Description = "Document author" };
var pageSizeOption = new Option<string>("--page-size") { Description = "Page size: letter, a4, legal, a3" };
pageSizeOption.DefaultValueFactory = _ => "letter";
var marginsOption = new Option<string>("--margins") { Description = "Margin preset: standard, narrow, wide" };
marginsOption.DefaultValueFactory = _ => "standard";
var headerTextOption = new Option<string>("--header") { Description = "Header text" };
var footerTextOption = new Option<string>("--footer") { Description = "Footer text" };
var pageNumbersOption = new Option<bool>("--page-numbers") { Description = "Add page numbers in footer" };
var tocOption = new Option<bool>("--toc") { Description = "Insert table of contents placeholder" };
var contentJsonOption = new Option<string>("--content-json") { Description = "Path to JSON file describing document content" };
var cmd = new Command("create", "Create a new DOCX document from scratch")
{
outputOption, typeOption, titleOption, authorOption, pageSizeOption,
marginsOption, headerTextOption, footerTextOption, pageNumbersOption,
tocOption, contentJsonOption
};
cmd.SetAction((parseResult) =>
{
var output = parseResult.GetValue(outputOption)!;
var docType = parseResult.GetValue(typeOption) ?? "report";
var title = parseResult.GetValue(titleOption);
var author = parseResult.GetValue(authorOption);
var pageSizeName = parseResult.GetValue(pageSizeOption) ?? "letter";
var marginsName = parseResult.GetValue(marginsOption) ?? "standard";
var headerText = parseResult.GetValue(headerTextOption);
var footerText = parseResult.GetValue(footerTextOption);
var pageNumbers = parseResult.GetValue(pageNumbersOption);
var tocPlaceholder = parseResult.GetValue(tocOption);
var contentJson = parseResult.GetValue(contentJsonOption);
var fontConfig = GetFontConfig(docType);
var pageSize = GetPageSizeConfig(pageSizeName);
var margins = GetMargins(marginsName);
using var doc = WordprocessingDocument.Create(output, WordprocessingDocumentType.Document);
var mainPart = doc.AddMainDocumentPart();
mainPart.Document = new Document(new Body());
var body = mainPart.Document.Body!;
// Add styles part with defaults
AddDefaultStyles(mainPart, fontConfig);
// Add section properties (page size, margins)
var sectPr = new SectionProperties();
sectPr.Append(new DocumentFormat.OpenXml.Wordprocessing.PageSize
{
Width = (UInt32Value)(uint)pageSize.WidthDxa,
Height = (UInt32Value)(uint)pageSize.HeightDxa
});
sectPr.Append(new PageMargin
{
Top = margins.TopDxa,
Bottom = margins.BottomDxa,
Left = (UInt32Value)(uint)margins.LeftDxa,
Right = (UInt32Value)(uint)margins.RightDxa
});
// Add header if requested
if (!string.IsNullOrEmpty(headerText))
{
var headerPart = mainPart.AddNewPart<HeaderPart>();
headerPart.Header = new Header(
new Paragraph(new Run(new Text(headerText))));
var headerRefId = mainPart.GetIdOfPart(headerPart);
sectPr.Append(new HeaderReference
{
Type = HeaderFooterValues.Default,
Id = headerRefId
});
}
// Add footer if requested
if (!string.IsNullOrEmpty(footerText) || pageNumbers)
{
var footerPart = mainPart.AddNewPart<FooterPart>();
var footerParagraph = new Paragraph();
if (!string.IsNullOrEmpty(footerText))
{
footerParagraph.Append(new Run(new Text(footerText)));
}
if (pageNumbers)
{
if (!string.IsNullOrEmpty(footerText))
footerParagraph.Append(new Run(new Text(" — ") { Space = SpaceProcessingModeValues.Preserve }));
footerParagraph.Append(new Run(
new FieldChar { FieldCharType = FieldCharValues.Begin }));
footerParagraph.Append(new Run(
new FieldCode(" PAGE ") { Space = SpaceProcessingModeValues.Preserve }));
footerParagraph.Append(new Run(
new FieldChar { FieldCharType = FieldCharValues.End }));
}
footerPart.Footer = new Footer(footerParagraph);
var footerRefId = mainPart.GetIdOfPart(footerPart);
sectPr.Append(new FooterReference
{
Type = HeaderFooterValues.Default,
Id = footerRefId
});
}
// Title
if (!string.IsNullOrEmpty(title))
{
var titlePara = new Paragraph(
new ParagraphProperties(new ParagraphStyleId { Val = "Title" }),
new Run(new Text(title)));
body.Append(titlePara);
}
// Author subtitle
if (!string.IsNullOrEmpty(author))
{
var authorPara = new Paragraph(
new ParagraphProperties(new ParagraphStyleId { Val = "Subtitle" }),
new Run(new Text(author)));
body.Append(authorPara);
}
// TOC placeholder
if (tocPlaceholder)
{
body.Append(new Paragraph(
new ParagraphProperties(new ParagraphStyleId { Val = "TOCHeading" }),
new Run(new Text("Table of Contents"))));
// Insert TOC field
var tocPara = new Paragraph();
tocPara.Append(new Run(new FieldChar { FieldCharType = FieldCharValues.Begin }));
tocPara.Append(new Run(new FieldCode(" TOC \\o \"1-3\" \\h \\z \\u ") { Space = SpaceProcessingModeValues.Preserve }));
tocPara.Append(new Run(new FieldChar { FieldCharType = FieldCharValues.Separate }));
tocPara.Append(new Run(new Text("Update this field to generate table of contents.")));
tocPara.Append(new Run(new FieldChar { FieldCharType = FieldCharValues.End }));
body.Append(tocPara);
// Page break after TOC
body.Append(new Paragraph(new Run(new Break { Type = BreakValues.Page })));
}
// Content from JSON (if provided)
if (!string.IsNullOrEmpty(contentJson) && File.Exists(contentJson))
{
var jsonContent = File.ReadAllText(contentJson);
AddContentFromJson(body, jsonContent, fontConfig);
}
// Ensure body has at least one paragraph
if (!body.Elements<Paragraph>().Any())
{
body.Append(new Paragraph());
}
// sectPr must be the last child of body
body.Append(sectPr);
mainPart.Document.Save();
Console.WriteLine($"Created {docType} document: {output}");
});
return cmd;
}
private static FontConfig GetFontConfig(string docType) => docType.ToLowerInvariant() switch
{
"letter" => FontDefaults.Letter,
"memo" => FontDefaults.Memo,
"academic" => FontDefaults.Academic,
_ => FontDefaults.Report,
};
private static Typography.PageSize GetPageSizeConfig(string name) => name.ToLowerInvariant() switch
{
"a4" => PageSizes.A4,
"legal" => PageSizes.Legal,
"a3" => PageSizes.A3,
_ => PageSizes.Letter,
};
private static MarginConfig GetMargins(string name) => name.ToLowerInvariant() switch
{
"narrow" => PageSizes.NarrowMargins,
"wide" => PageSizes.WideMargins,
_ => PageSizes.StandardMargins,
};
private static void AddDefaultStyles(MainDocumentPart mainPart, FontConfig fontConfig)
{
var stylesPart = mainPart.AddNewPart<StyleDefinitionsPart>();
var styles = new Styles();
// Default run properties
var defaultRPr = new StyleRunProperties(
new RunFonts { Ascii = fontConfig.BodyFont, HighAnsi = fontConfig.BodyFont },
new FontSize { Val = UnitConverter.FontSizeToSz(fontConfig.BodySize) },
new FontSizeComplexScript { Val = UnitConverter.FontSizeToSz(fontConfig.BodySize) });
// Normal style
styles.Append(new Style(
new StyleName { Val = "Normal" },
new PrimaryStyle(),
defaultRPr)
{ Type = StyleValues.Paragraph, StyleId = "Normal", Default = true });
// Heading styles 1-6
double[] headingSizes = [fontConfig.Heading1Size, fontConfig.Heading2Size, fontConfig.Heading3Size,
fontConfig.Heading4Size, fontConfig.Heading5Size, fontConfig.Heading6Size];
for (int i = 0; i < 6; i++)
{
var level = i + 1;
var headingStyle = new Style(
new StyleName { Val = $"heading {level}" },
new BasedOn { Val = "Normal" },
new NextParagraphStyle { Val = "Normal" },
new PrimaryStyle(),
new StyleParagraphProperties(
new KeepNext(),
new KeepLines(),
new SpacingBetweenLines { Before = "240", After = "120" },
new OutlineLevel { Val = i }),
new StyleRunProperties(
new RunFonts { Ascii = fontConfig.HeadingFont, HighAnsi = fontConfig.HeadingFont },
new FontSize { Val = UnitConverter.FontSizeToSz(headingSizes[i]) },
new FontSizeComplexScript { Val = UnitConverter.FontSizeToSz(headingSizes[i]) },
new Bold()))
{ Type = StyleValues.Paragraph, StyleId = $"Heading{level}" };
styles.Append(headingStyle);
}
// Title style
styles.Append(new Style(
new StyleName { Val = "Title" },
new BasedOn { Val = "Normal" },
new NextParagraphStyle { Val = "Normal" },
new PrimaryStyle(),
new StyleParagraphProperties(
new Justification { Val = JustificationValues.Center },
new SpacingBetweenLines { After = "300" }),
new StyleRunProperties(
new RunFonts { Ascii = fontConfig.HeadingFont, HighAnsi = fontConfig.HeadingFont },
new FontSize { Val = UnitConverter.FontSizeToSz(fontConfig.Heading1Size + 6) },
new FontSizeComplexScript { Val = UnitConverter.FontSizeToSz(fontConfig.Heading1Size + 6) }))
{ Type = StyleValues.Paragraph, StyleId = "Title" });
// Subtitle style
styles.Append(new Style(
new StyleName { Val = "Subtitle" },
new BasedOn { Val = "Normal" },
new NextParagraphStyle { Val = "Normal" },
new StyleParagraphProperties(
new Justification { Val = JustificationValues.Center },
new SpacingBetweenLines { After = "200" }),
new StyleRunProperties(
new Color { Val = "5A5A5A" },
new FontSize { Val = UnitConverter.FontSizeToSz(fontConfig.BodySize + 2) }))
{ Type = StyleValues.Paragraph, StyleId = "Subtitle" });
stylesPart.Styles = styles;
stylesPart.Styles.Save();
}
private static void AddContentFromJson(Body body, string jsonContent, FontConfig fontConfig)
{
// Simple JSON content format: array of {type, text, level?}
// e.g. [{"type":"heading","text":"Introduction","level":1},{"type":"paragraph","text":"..."}]
try
{
using var jsonDoc = System.Text.Json.JsonDocument.Parse(jsonContent);
foreach (var element in jsonDoc.RootElement.EnumerateArray())
{
var type = element.GetProperty("type").GetString() ?? "paragraph";
var text = element.GetProperty("text").GetString() ?? "";
switch (type)
{
case "heading":
var level = element.TryGetProperty("level", out var lvl) ? lvl.GetInt32() : 1;
level = Math.Clamp(level, 1, 6);
body.Append(new Paragraph(
new ParagraphProperties(new ParagraphStyleId { Val = $"Heading{level}" }),
new Run(new Text(text))));
break;
case "paragraph":
body.Append(new Paragraph(new Run(new Text(text))));
break;
case "pagebreak":
body.Append(new Paragraph(new Run(new Break { Type = BreakValues.Page })));
break;
}
}
}
catch (System.Text.Json.JsonException ex)
{
Console.Error.WriteLine($"Warning: could not parse content JSON: {ex.Message}");
}
}
}

View File

@@ -0,0 +1,155 @@
using System.CommandLine;
using System.IO.Compression;
using System.Text.Json;
using System.Xml.Linq;
namespace MiniMaxAIDocx.Core.Commands;
public static class DiffCommand
{
private static readonly XNamespace W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
public static Command Create()
{
var beforeOption = new Option<string>("--before") { Description = "Original DOCX", Required = true };
var afterOption = new Option<string>("--after") { Description = "Modified DOCX", Required = true };
var jsonOption = new Option<bool>("--json") { Description = "Output as JSON" };
var cmd = new Command("diff", "Compare two DOCX files")
{
beforeOption, afterOption, jsonOption
};
cmd.SetAction((parseResult) =>
{
var before = parseResult.GetValue(beforeOption)!;
var after = parseResult.GetValue(afterOption)!;
var asJson = parseResult.GetValue(jsonOption);
if (!File.Exists(before)) { Console.Error.WriteLine($"File not found: {before}"); return; }
if (!File.Exists(after)) { Console.Error.WriteLine($"File not found: {after}"); return; }
var beforeParas = ExtractParagraphs(before);
var afterParas = ExtractParagraphs(after);
var beforeStyles = ExtractStyleIds(before);
var afterStyles = ExtractStyleIds(after);
var beforeStructure = ExtractStructure(before);
var afterStructure = ExtractStructure(after);
// Text diff
var textChanges = new List<object>();
int maxLen = Math.Max(beforeParas.Count, afterParas.Count);
int changedParas = 0;
for (int i = 0; i < maxLen; i++)
{
var bText = i < beforeParas.Count ? beforeParas[i] : null;
var aText = i < afterParas.Count ? afterParas[i] : null;
if (bText != aText)
{
changedParas++;
textChanges.Add(new
{
paragraph = i + 1,
before = bText ?? "(absent)",
after = aText ?? "(absent)"
});
}
}
// Style diff
var addedStyles = afterStyles.Except(beforeStyles).ToList();
var removedStyles = beforeStyles.Except(afterStyles).ToList();
// Structure diff
var structureChanges = new List<string>();
if (beforeStructure.Sections != afterStructure.Sections)
structureChanges.Add($"Sections: {beforeStructure.Sections} -> {afterStructure.Sections}");
if (beforeStructure.Tables != afterStructure.Tables)
structureChanges.Add($"Tables: {beforeStructure.Tables} -> {afterStructure.Tables}");
if (beforeStructure.Images != afterStructure.Images)
structureChanges.Add($"Images: {beforeStructure.Images} -> {afterStructure.Images}");
var result = new
{
textChanges,
styleChanges = new { added = addedStyles, removed = removedStyles },
structureChanges,
summary = $"{changedParas} paragraphs changed, {addedStyles.Count + removedStyles.Count} styles modified, {structureChanges.Count} structural changes"
};
if (asJson)
{
Console.WriteLine(JsonSerializer.Serialize(result, new JsonSerializerOptions { WriteIndented = true }));
}
else
{
Console.WriteLine(result.summary);
Console.WriteLine();
if (textChanges.Count > 0)
{
Console.WriteLine($"Text changes ({textChanges.Count}):");
foreach (var tc in textChanges.Take(20))
Console.WriteLine($" {tc}");
if (textChanges.Count > 20)
Console.WriteLine($" ... and {textChanges.Count - 20} more");
}
if (addedStyles.Count > 0)
Console.WriteLine($"Added styles: {string.Join(", ", addedStyles)}");
if (removedStyles.Count > 0)
Console.WriteLine($"Removed styles: {string.Join(", ", removedStyles)}");
foreach (var sc in structureChanges)
Console.WriteLine($"Structure: {sc}");
}
});
return cmd;
}
private static List<string> ExtractParagraphs(string docxPath)
{
using var zip = ZipFile.OpenRead(docxPath);
var entry = zip.GetEntry("word/document.xml");
if (entry == null) return new();
using var stream = entry.Open();
var doc = XDocument.Load(stream);
return doc.Descendants(W + "p")
.Select(p => string.Concat(p.Descendants(W + "t").Select(t => t.Value)))
.ToList();
}
private static HashSet<string> ExtractStyleIds(string docxPath)
{
using var zip = ZipFile.OpenRead(docxPath);
var entry = zip.GetEntry("word/styles.xml");
if (entry == null) return new();
using var stream = entry.Open();
var doc = XDocument.Load(stream);
return doc.Descendants(W + "style")
.Select(s => (string?)s.Attribute(W + "styleId"))
.Where(id => id != null)
.ToHashSet()!;
}
private record StructureInfo(int Sections, int Tables, int Images);
private static StructureInfo ExtractStructure(string docxPath)
{
using var zip = ZipFile.OpenRead(docxPath);
var entry = zip.GetEntry("word/document.xml");
if (entry == null) return new(0, 0, 0);
using var stream = entry.Open();
var doc = XDocument.Load(stream);
return new(
doc.Descendants(W + "sectPr").Count(),
doc.Descendants(W + "tbl").Count(),
doc.Descendants(W + "drawing").Count()
);
}
}

View File

@@ -0,0 +1,487 @@
using System.CommandLine;
using System.Text.RegularExpressions;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using MiniMaxAIDocx.Core.OpenXml;
namespace MiniMaxAIDocx.Core.Commands;
/// <summary>
/// Scenario B: Surgical content editing operations on existing DOCX files.
/// Preserves all existing formatting and minimizes XML changes.
/// </summary>
public static class EditContentCommand
{
public static Command Create()
{
var cmd = new Command("edit", "Edit existing DOCX content");
cmd.Add(CreateReplaceTextCommand());
cmd.Add(CreateFillTableCommand());
cmd.Add(CreateInsertParagraphCommand());
cmd.Add(CreateUpdateFieldCommand());
cmd.Add(CreateListPlaceholdersCommand());
cmd.Add(CreateFillPlaceholdersCommand());
return cmd;
}
private static Command CreateReplaceTextCommand()
{
var inputOpt = new Option<string>("--input") { Description = "Input DOCX file", Required = true };
var outputOpt = new Option<string>("--output") { Description = "Output file path (defaults to overwriting input)" };
var searchOpt = new Option<string>("--search") { Description = "Text to search for", Required = true };
var replaceOpt = new Option<string>("--replace") { Description = "Replacement text", Required = true };
var regexOpt = new Option<bool>("--regex") { Description = "Treat search as a regex pattern" };
var cmd = new Command("replace-text", "Replace text while preserving formatting")
{
inputOpt, outputOpt, searchOpt, replaceOpt, regexOpt
};
cmd.SetAction((parseResult) =>
{
var input = parseResult.GetValue(inputOpt)!;
var output = parseResult.GetValue(outputOpt) ?? input;
var search = parseResult.GetValue(searchOpt)!;
var replace = parseResult.GetValue(replaceOpt)!;
var useRegex = parseResult.GetValue(regexOpt);
if (output != input) File.Copy(input, output, overwrite: true);
using var doc = WordprocessingDocument.Open(output, true);
var body = doc.MainDocumentPart?.Document.Body;
if (body == null) { Console.Error.WriteLine("No document body found."); return; }
int count = 0;
foreach (var paragraph in body.Descendants<Paragraph>())
{
count += ReplaceInParagraph(paragraph, search, replace, useRegex);
}
doc.MainDocumentPart!.Document.Save();
Console.WriteLine($"Replaced {count} occurrence(s) in {output}");
});
return cmd;
}
private static Command CreateFillTableCommand()
{
var inputOpt = new Option<string>("--input") { Description = "Input DOCX file", Required = true };
var outputOpt = new Option<string>("--output") { Description = "Output file path" };
var tableIndexOpt = new Option<int>("--table-index") { Description = "Zero-based index of the table to fill" };
tableIndexOpt.DefaultValueFactory = _ => 0;
var csvOpt = new Option<string>("--csv") { Description = "CSV file with data to fill", Required = true };
var appendOpt = new Option<bool>("--append") { Description = "Append rows instead of replacing existing data rows" };
var cmd = new Command("fill-table", "Fill a table with data from CSV")
{
inputOpt, outputOpt, tableIndexOpt, csvOpt, appendOpt
};
cmd.SetAction((parseResult) =>
{
var input = parseResult.GetValue(inputOpt)!;
var output = parseResult.GetValue(outputOpt) ?? input;
var tableIndex = parseResult.GetValue(tableIndexOpt);
var csvPath = parseResult.GetValue(csvOpt)!;
var append = parseResult.GetValue(appendOpt);
if (output != input) File.Copy(input, output, overwrite: true);
if (!File.Exists(csvPath)) { Console.Error.WriteLine($"CSV file not found: {csvPath}"); return; }
using var doc = WordprocessingDocument.Open(output, true);
var body = doc.MainDocumentPart?.Document.Body;
if (body == null) { Console.Error.WriteLine("No document body found."); return; }
var tables = body.Elements<Table>().ToList();
if (tableIndex >= tables.Count)
{
Console.Error.WriteLine($"Table index {tableIndex} out of range (found {tables.Count} tables).");
return;
}
var table = tables[tableIndex];
var csvLines = File.ReadAllLines(csvPath);
if (csvLines.Length == 0) { Console.WriteLine("CSV is empty, nothing to fill."); return; }
// Get template row properties from the first data row (second row, after header)
var existingRows = table.Elements<TableRow>().ToList();
TableRow? templateRow = existingRows.Count > 1 ? existingRows[1] : existingRows.FirstOrDefault();
var templateTrPr = templateRow?.TableRowProperties?.CloneNode(true) as TableRowProperties;
if (!append)
{
// Remove all rows except the header row
for (int i = existingRows.Count - 1; i >= 1; i--)
existingRows[i].Remove();
}
int rowsAdded = 0;
// Skip header line in CSV (index 0)
for (int i = 1; i < csvLines.Length; i++)
{
var values = ParseCsvLine(csvLines[i]);
var newRow = new TableRow();
if (templateTrPr != null)
newRow.Append(templateTrPr.CloneNode(true));
foreach (var val in values)
{
var cell = new TableCell(
new Paragraph(new Run(new Text(val))));
newRow.Append(cell);
}
table.Append(newRow);
rowsAdded++;
}
doc.MainDocumentPart!.Document.Save();
Console.WriteLine($"Added {rowsAdded} rows to table {tableIndex} in {output}");
});
return cmd;
}
private static Command CreateInsertParagraphCommand()
{
var inputOpt = new Option<string>("--input") { Description = "Input DOCX file", Required = true };
var outputOpt = new Option<string>("--output") { Description = "Output file path" };
var textOpt = new Option<string>("--text") { Description = "Paragraph text", Required = true };
var styleOpt = new Option<string>("--style") { Description = "Paragraph style (e.g. Heading1, Normal)" };
var afterOpt = new Option<int>("--after-paragraph") { Description = "Insert after this paragraph index (0-based)" };
afterOpt.DefaultValueFactory = _ => -1; // -1 = append at end
var cmd = new Command("insert-paragraph", "Insert a new paragraph")
{
inputOpt, outputOpt, textOpt, styleOpt, afterOpt
};
cmd.SetAction((parseResult) =>
{
var input = parseResult.GetValue(inputOpt)!;
var output = parseResult.GetValue(outputOpt) ?? input;
var text = parseResult.GetValue(textOpt)!;
var style = parseResult.GetValue(styleOpt);
var afterIndex = parseResult.GetValue(afterOpt);
if (output != input) File.Copy(input, output, overwrite: true);
using var doc = WordprocessingDocument.Open(output, true);
var body = doc.MainDocumentPart?.Document.Body;
if (body == null) { Console.Error.WriteLine("No document body found."); return; }
var newPara = new Paragraph();
if (!string.IsNullOrEmpty(style))
newPara.Append(new ParagraphProperties(new ParagraphStyleId { Val = style }));
newPara.Append(new Run(new Text(text)));
var paragraphs = body.Elements<Paragraph>().ToList();
if (afterIndex >= 0 && afterIndex < paragraphs.Count)
{
paragraphs[afterIndex].InsertAfterSelf(newPara);
}
else
{
// Insert before sectPr if present, otherwise append
var sectPr = body.Elements<SectionProperties>().FirstOrDefault();
if (sectPr != null)
sectPr.InsertBeforeSelf(newPara);
else
body.Append(newPara);
}
doc.MainDocumentPart!.Document.Save();
Console.WriteLine($"Inserted paragraph in {output}");
});
return cmd;
}
private static Command CreateUpdateFieldCommand()
{
var inputOpt = new Option<string>("--input") { Description = "Input DOCX file", Required = true };
var outputOpt = new Option<string>("--output") { Description = "Output file path" };
var fieldNameOpt = new Option<string>("--field") { Description = "Document property field name (e.g. TITLE, AUTHOR)", Required = true };
var valueOpt = new Option<string>("--value") { Description = "New field value", Required = true };
var cmd = new Command("update-field", "Update a document property field value")
{
inputOpt, outputOpt, fieldNameOpt, valueOpt
};
cmd.SetAction((parseResult) =>
{
var input = parseResult.GetValue(inputOpt)!;
var output = parseResult.GetValue(outputOpt) ?? input;
var fieldName = parseResult.GetValue(fieldNameOpt)!;
var value = parseResult.GetValue(valueOpt)!;
if (output != input) File.Copy(input, output, overwrite: true);
using var doc = WordprocessingDocument.Open(output, true);
// Update core properties
var props = doc.PackageProperties;
switch (fieldName.ToUpperInvariant())
{
case "TITLE": props.Title = value; break;
case "AUTHOR": props.Creator = value; break;
case "SUBJECT": props.Subject = value; break;
case "KEYWORDS": props.Keywords = value; break;
case "DESCRIPTION": props.Description = value; break;
case "CATEGORY": props.Category = value; break;
default:
Console.Error.WriteLine($"Unknown field: {fieldName}. Supported: TITLE, AUTHOR, SUBJECT, KEYWORDS, DESCRIPTION, CATEGORY");
return;
}
Console.WriteLine($"Updated {fieldName} to \"{value}\" in {output}");
});
return cmd;
}
private static Command CreateListPlaceholdersCommand()
{
var inputOpt = new Option<string>("--input") { Description = "Input DOCX file", Required = true };
var patternOpt = new Option<string>("--pattern") { Description = "Placeholder pattern (regex)" };
patternOpt.DefaultValueFactory = _ => @"\{\{(\w+)\}\}"; // {{PLACEHOLDER}}
var cmd = new Command("list-placeholders", "List all placeholders found in the document")
{
inputOpt, patternOpt
};
cmd.SetAction((parseResult) =>
{
var input = parseResult.GetValue(inputOpt)!;
var pattern = parseResult.GetValue(patternOpt)!;
using var doc = WordprocessingDocument.Open(input, false);
var body = doc.MainDocumentPart?.Document.Body;
if (body == null) { Console.Error.WriteLine("No document body found."); return; }
var placeholders = new HashSet<string>();
var regex = new Regex(pattern);
foreach (var paragraph in body.Descendants<Paragraph>())
{
var fullText = string.Concat(paragraph.Descendants<Text>().Select(t => t.Text));
foreach (Match match in regex.Matches(fullText))
{
placeholders.Add(match.Value);
}
}
if (placeholders.Count == 0)
{
Console.WriteLine("No placeholders found.");
return;
}
Console.WriteLine($"Found {placeholders.Count} unique placeholder(s):");
foreach (var p in placeholders.OrderBy(x => x))
Console.WriteLine($" {p}");
});
return cmd;
}
private static Command CreateFillPlaceholdersCommand()
{
var inputOpt = new Option<string>("--input") { Description = "Input DOCX file", Required = true };
var outputOpt = new Option<string>("--output") { Description = "Output file path" };
var mappingOpt = new Option<string>("--mapping") { Description = "JSON file mapping placeholder names to values", Required = true };
var patternOpt = new Option<string>("--pattern") { Description = "Placeholder pattern with capture group for the name" };
patternOpt.DefaultValueFactory = _ => @"\{\{(\w+)\}\}";
var cmd = new Command("fill-placeholders", "Replace placeholders with values from a mapping file")
{
inputOpt, outputOpt, mappingOpt, patternOpt
};
cmd.SetAction((parseResult) =>
{
var input = parseResult.GetValue(inputOpt)!;
var output = parseResult.GetValue(outputOpt) ?? input;
var mappingPath = parseResult.GetValue(mappingOpt)!;
var pattern = parseResult.GetValue(patternOpt)!;
if (!File.Exists(mappingPath)) { Console.Error.WriteLine($"Mapping file not found: {mappingPath}"); return; }
var mappingJson = File.ReadAllText(mappingPath);
Dictionary<string, string> mapping;
try
{
mapping = System.Text.Json.JsonSerializer.Deserialize<Dictionary<string, string>>(mappingJson) ?? [];
}
catch (System.Text.Json.JsonException ex)
{
Console.Error.WriteLine($"Invalid mapping JSON: {ex.Message}");
return;
}
if (output != input) File.Copy(input, output, overwrite: true);
using var doc = WordprocessingDocument.Open(output, true);
var body = doc.MainDocumentPart?.Document.Body;
if (body == null) { Console.Error.WriteLine("No document body found."); return; }
int totalReplacements = 0;
var regex = new Regex(pattern);
foreach (var paragraph in body.Descendants<Paragraph>())
{
var fullText = string.Concat(paragraph.Descendants<Text>().Select(t => t.Text));
var matches = regex.Matches(fullText);
if (matches.Count == 0) continue;
foreach (Match match in matches)
{
var placeholderName = match.Groups.Count > 1 ? match.Groups[1].Value : match.Value;
if (mapping.TryGetValue(placeholderName, out var replacement))
{
totalReplacements += ReplaceInParagraph(paragraph, match.Value, replacement, false);
}
}
}
doc.MainDocumentPart!.Document.Save();
Console.WriteLine($"Filled {totalReplacements} placeholder(s) in {output}");
});
return cmd;
}
/// <summary>
/// Replaces text within a paragraph while preserving run formatting.
/// Handles the case where search text may span multiple runs.
/// </summary>
private static int ReplaceInParagraph(Paragraph paragraph, string search, string replace, bool useRegex)
{
var runs = paragraph.Elements<Run>().ToList();
if (runs.Count == 0) return 0;
// Build the full paragraph text and a map from character index to (run, position within run)
var fullText = string.Concat(runs.SelectMany(r => r.Elements<Text>().Select(t => t.Text)));
if (string.IsNullOrEmpty(fullText)) return 0;
int count = 0;
if (!useRegex)
{
// Simple case: search within each run first
foreach (var run in runs)
{
foreach (var textElement in run.Elements<Text>().ToList())
{
if (textElement.Text.Contains(search))
{
var newText = textElement.Text.Replace(search, replace);
count += (textElement.Text.Length - newText.Length + replace.Length - search.Length) == 0 ? 0 :
CountOccurrences(textElement.Text, search);
textElement.Text = newText;
if (newText.StartsWith(' ') || newText.EndsWith(' '))
textElement.Space = SpaceProcessingModeValues.Preserve;
}
}
}
// Handle cross-run matches by concatenating all runs, replacing, and rebuilding
if (count == 0 && fullText.Contains(search))
{
var newFullText = fullText.Replace(search, replace);
count = CountOccurrences(fullText, search);
RebuildRunsWithText(paragraph, runs, newFullText);
}
}
else
{
var regex = new Regex(search);
if (regex.IsMatch(fullText))
{
count = regex.Matches(fullText).Count;
var newFullText = regex.Replace(fullText, replace);
RebuildRunsWithText(paragraph, runs, newFullText);
}
}
return count;
}
/// <summary>
/// Replaces the text content of existing runs with new text,
/// preserving the formatting of the first run.
/// </summary>
private static void RebuildRunsWithText(Paragraph paragraph, List<Run> runs, string newText)
{
if (runs.Count == 0) return;
// Keep the first run's formatting, set its text to the full new text
var firstRun = runs[0];
var firstText = firstRun.Elements<Text>().FirstOrDefault();
if (firstText != null)
{
firstText.Text = newText;
if (newText.StartsWith(' ') || newText.EndsWith(' '))
firstText.Space = SpaceProcessingModeValues.Preserve;
}
// Remove all other runs
for (int i = 1; i < runs.Count; i++)
runs[i].Remove();
}
private static int CountOccurrences(string text, string search)
{
int count = 0;
int index = 0;
while ((index = text.IndexOf(search, index, StringComparison.Ordinal)) != -1)
{
count++;
index += search.Length;
}
return count;
}
private static string[] ParseCsvLine(string line)
{
// Simple CSV parser (handles quoted fields)
var result = new List<string>();
bool inQuotes = false;
var current = new System.Text.StringBuilder();
for (int i = 0; i < line.Length; i++)
{
char c = line[i];
if (c == '"')
{
if (inQuotes && i + 1 < line.Length && line[i + 1] == '"')
{
current.Append('"');
i++;
}
else
{
inQuotes = !inQuotes;
}
}
else if (c == ',' && !inQuotes)
{
result.Add(current.ToString());
current.Clear();
}
else
{
current.Append(c);
}
}
result.Add(current.ToString());
return result.ToArray();
}
}

View File

@@ -0,0 +1,108 @@
using System.CommandLine;
using System.IO.Compression;
using System.Xml.Linq;
namespace MiniMaxAIDocx.Core.Commands;
public static class FixOrderCommand
{
private static readonly XNamespace W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
// Canonical element ordering within common parent elements per ISO 29500
private static readonly Dictionary<string, List<string>> ElementOrder = new()
{
["pPr"] = new() { "pStyle", "keepNext", "keepLines", "pageBreakBefore", "widowControl", "numPr", "suppressLineNumbers", "pBdr", "shd", "tabs", "suppressAutoHyphens", "spacing", "ind", "jc", "outlineLvl", "rPr" },
["rPr"] = new() { "rStyle", "rFonts", "b", "bCs", "i", "iCs", "caps", "smallCaps", "strike", "dstrike", "vanish", "color", "spacing", "w", "kern", "position", "sz", "szCs", "highlight", "u", "effect", "vertAlign", "lang" },
["tblPr"] = new() { "tblStyle", "tblpPr", "tblOverlap", "tblW", "jc", "tblInd", "tblBorders", "shd", "tblLayout", "tblCellMar", "tblLook" },
["tcPr"] = new() { "cnfStyle", "tcW", "gridSpan", "hMerge", "vMerge", "tcBorders", "shd", "noWrap", "tcMar", "textDirection", "tcFitText", "vAlign" },
["sectPr"] = new() { "headerReference", "footerReference", "footnotePr", "endnotePr", "type", "pgSz", "pgMar", "paperSrc", "pgBorders", "lnNumType", "pgNumType", "cols", "docGrid" },
};
public static Command Create()
{
var inputOption = new Option<string>("--input") { Description = "DOCX file to fix", Required = true };
var outputOption = new Option<string>("--output") { Description = "Output path (default: overwrite input)" };
var backupOption = new Option<bool>("--backup") { Description = "Create .bak before modifying", DefaultValueFactory = (_) => true };
var cmd = new Command("fix-order", "Fix OpenXML element ordering per ISO 29500")
{
inputOption, outputOption, backupOption
};
cmd.SetAction((parseResult) =>
{
var input = parseResult.GetValue(inputOption)!;
var output = parseResult.GetValue(outputOption) ?? input;
var backup = parseResult.GetValue(backupOption);
if (!File.Exists(input))
{
Console.Error.WriteLine($"File not found: {input}");
return;
}
if (backup && output == input)
File.Copy(input, input + ".bak", true);
var tempPath = Path.GetTempFileName();
File.Copy(input, tempPath, true);
using var zip = ZipFile.Open(tempPath, ZipArchiveMode.Update);
var entry = zip.GetEntry("word/document.xml");
if (entry == null)
{
Console.Error.WriteLine("Not a valid DOCX");
return;
}
XDocument doc;
using (var stream = entry.Open())
doc = XDocument.Load(stream);
int reorderedCount = 0;
foreach (var (parentName, order) in ElementOrder)
{
foreach (var parent in doc.Descendants(W + parentName))
{
var children = parent.Elements().ToList();
var sorted = children.OrderBy(e =>
{
var idx = order.IndexOf(e.Name.LocalName);
return idx >= 0 ? idx : order.Count;
}).ToList();
bool changed = false;
for (int i = 0; i < children.Count; i++)
{
if (children[i] != sorted[i])
{
changed = true;
break;
}
}
if (changed)
{
parent.ReplaceNodes(sorted);
reorderedCount++;
}
}
}
entry.Delete();
var newEntry = zip.CreateEntry("word/document.xml", CompressionLevel.Optimal);
using (var stream = newEntry.Open())
doc.Save(stream);
zip.Dispose();
File.Copy(tempPath, output, true);
File.Delete(tempPath);
Console.WriteLine($"Reordered {reorderedCount} element group(s)");
Console.WriteLine($"Written to: {output}");
});
return cmd;
}
}

View File

@@ -0,0 +1,122 @@
using System.CommandLine;
using System.IO.Compression;
using System.Xml.Linq;
namespace MiniMaxAIDocx.Core.Commands;
public static class MergeRunsCommand
{
private static readonly XNamespace W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
public static Command Create()
{
var inputOption = new Option<string>("--input") { Description = "DOCX file to optimize", Required = true };
var outputOption = new Option<string>("--output") { Description = "Output path (default: overwrite input)" };
var dryRunOption = new Option<bool>("--dry-run") { Description = "Report without modifying" };
var cmd = new Command("merge-runs", "Merge adjacent runs with identical formatting")
{
inputOption, outputOption, dryRunOption
};
cmd.SetAction((parseResult) =>
{
var input = parseResult.GetValue(inputOption)!;
var output = parseResult.GetValue(outputOption) ?? input;
var dryRun = parseResult.GetValue(dryRunOption);
if (!File.Exists(input))
{
Console.Error.WriteLine($"File not found: {input}");
return;
}
var tempPath = Path.GetTempFileName();
File.Copy(input, tempPath, true);
using var zip = ZipFile.Open(tempPath, ZipArchiveMode.Update);
var entry = zip.GetEntry("word/document.xml");
if (entry == null)
{
Console.Error.WriteLine("Not a valid DOCX: missing word/document.xml");
return;
}
XDocument doc;
using (var stream = entry.Open())
doc = XDocument.Load(stream);
int originalCount = 0;
int mergedCount = 0;
foreach (var p in doc.Descendants(W + "p"))
{
var runs = p.Elements(W + "r").ToList();
originalCount += runs.Count;
for (int i = runs.Count - 1; i > 0; i--)
{
var current = runs[i];
var previous = runs[i - 1];
var curProps = current.Element(W + "rPr")?.ToString() ?? "";
var prevProps = previous.Element(W + "rPr")?.ToString() ?? "";
if (curProps == prevProps)
{
// Only merge if both contain only text elements
var curChildren = current.Elements().Where(e => e.Name != W + "rPr").ToList();
var prevChildren = previous.Elements().Where(e => e.Name != W + "rPr").ToList();
if (curChildren.All(e => e.Name == W + "t") && prevChildren.All(e => e.Name == W + "t"))
{
var prevText = previous.Elements(W + "t").LastOrDefault();
var curText = current.Elements(W + "t").FirstOrDefault();
if (prevText != null && curText != null)
{
prevText.Value += curText.Value;
prevText.SetAttributeValue(XNamespace.Xml + "space", "preserve");
foreach (var extra in current.Elements(W + "t").Skip(1))
{
previous.Add(new XElement(extra));
}
current.Remove();
runs.RemoveAt(i);
}
}
}
}
mergedCount += runs.Count;
}
if (dryRun)
{
Console.WriteLine($"Original runs: {originalCount}");
Console.WriteLine($"After merge: {mergedCount}");
Console.WriteLine($"Reduction: {(originalCount > 0 ? (originalCount - mergedCount) * 100.0 / originalCount : 0):F1}%");
File.Delete(tempPath);
return;
}
entry.Delete();
var newEntry = zip.CreateEntry("word/document.xml", CompressionLevel.Optimal);
using (var stream = newEntry.Open())
doc.Save(stream);
zip.Dispose();
File.Copy(tempPath, output, true);
File.Delete(tempPath);
Console.WriteLine($"Original runs: {originalCount}");
Console.WriteLine($"After merge: {mergedCount}");
Console.WriteLine($"Reduction: {(originalCount > 0 ? (originalCount - mergedCount) * 100.0 / originalCount : 0):F1}%");
Console.WriteLine($"Written to: {output}");
});
return cmd;
}
}

View File

@@ -0,0 +1,107 @@
using System.CommandLine;
using System.Text.Json;
using MiniMaxAIDocx.Core.Validation;
namespace MiniMaxAIDocx.Core.Commands;
public static class ValidateCommand
{
public static Command Create()
{
var inputOption = new Option<string>("--input") { Description = "DOCX file to validate", Required = true };
var xsdOption = new Option<string>("--xsd") { Description = "XSD schema path for XML validation" };
var businessOption = new Option<bool>("--business") { Description = "Run business rule validation" };
var gateCheckOption = new Option<string>("--gate-check") { Description = "Template DOCX for gate-check validation" };
var jsonOption = new Option<bool>("--json") { Description = "Output results as JSON" };
var cmd = new Command("validate", "Validate DOCX structure and content")
{
inputOption, xsdOption, businessOption, gateCheckOption, jsonOption
};
cmd.SetAction((parseResult) =>
{
var input = parseResult.GetValue(inputOption)!;
var xsd = parseResult.GetValue(xsdOption);
var business = parseResult.GetValue(businessOption);
var gateCheck = parseResult.GetValue(gateCheckOption);
var asJson = parseResult.GetValue(jsonOption);
if (!File.Exists(input))
{
Console.Error.WriteLine($"File not found: {input}");
return;
}
var combinedResult = new ValidationResult();
GateCheckResult? gateResult = null;
if (xsd != null)
{
var xsdValidator = new XsdValidator();
combinedResult.Merge(xsdValidator.Validate(input, xsd));
}
if (business)
{
var bizValidator = new BusinessRuleValidator();
combinedResult.Merge(bizValidator.Validate(input));
}
if (gateCheck != null)
{
var gateValidator = new GateCheckValidator();
gateResult = gateValidator.Validate(input, gateCheck);
}
if (asJson)
{
var output = new
{
isValid = combinedResult.IsValid && (gateResult?.Passed ?? true),
errors = combinedResult.Errors,
warnings = combinedResult.Warnings,
gateCheck = gateResult == null ? null : new
{
passed = gateResult.Passed,
violations = gateResult.Violations
}
};
Console.WriteLine(JsonSerializer.Serialize(output, new JsonSerializerOptions { WriteIndented = true }));
}
else
{
if (combinedResult.Errors.Count > 0)
{
Console.WriteLine($"ERRORS ({combinedResult.Errors.Count}):");
foreach (var e in combinedResult.Errors)
Console.WriteLine($" [{e.Severity}] {e.Message}" + (e.LineNumber > 0 ? $" (line {e.LineNumber}:{e.LinePosition})" : ""));
}
if (combinedResult.Warnings.Count > 0)
{
Console.WriteLine($"WARNINGS ({combinedResult.Warnings.Count}):");
foreach (var w in combinedResult.Warnings)
Console.WriteLine($" [{w.Severity}] {w.Message}");
}
if (gateResult != null)
{
Console.WriteLine(gateResult.Passed ? "GATE CHECK: PASSED" : "GATE CHECK: FAILED");
foreach (var v in gateResult.Violations)
Console.WriteLine($" - {v}");
}
if (combinedResult.IsValid && (gateResult?.Passed ?? true))
Console.WriteLine("Validation: PASSED");
else
Console.WriteLine("Validation: FAILED");
}
if (!combinedResult.IsValid || gateResult is { Passed: false })
Environment.ExitCode = 1;
});
return cmd;
}
}