Initial commit: add all skills files
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,147 @@
|
||||
using System.CommandLine;
|
||||
using System.IO.Compression;
|
||||
using System.Text.Json;
|
||||
using System.Xml.Linq;
|
||||
|
||||
namespace MiniMaxAIDocx.Core.Commands;
|
||||
|
||||
public static class AnalyzeCommand
|
||||
{
|
||||
private static readonly XNamespace W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
|
||||
private static readonly XNamespace WP = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing";
|
||||
|
||||
public static Command Create()
|
||||
{
|
||||
var inputOption = new Option<string>("--input") { Description = "DOCX file to analyze", Required = true };
|
||||
var jsonOption = new Option<bool>("--json") { Description = "Output as JSON" };
|
||||
|
||||
var cmd = new Command("analyze", "Analyze document structure and styles")
|
||||
{
|
||||
inputOption, jsonOption
|
||||
};
|
||||
|
||||
cmd.SetAction((parseResult) =>
|
||||
{
|
||||
var input = parseResult.GetValue(inputOption)!;
|
||||
var asJson = parseResult.GetValue(jsonOption);
|
||||
|
||||
if (!File.Exists(input))
|
||||
{
|
||||
Console.Error.WriteLine($"File not found: {input}");
|
||||
return;
|
||||
}
|
||||
|
||||
using var zip = ZipFile.OpenRead(input);
|
||||
var docEntry = zip.GetEntry("word/document.xml");
|
||||
if (docEntry == null)
|
||||
{
|
||||
Console.Error.WriteLine("Not a valid DOCX");
|
||||
return;
|
||||
}
|
||||
|
||||
XDocument doc;
|
||||
using (var stream = docEntry.Open())
|
||||
doc = XDocument.Load(stream);
|
||||
|
||||
var body = doc.Root?.Element(W + "body");
|
||||
if (body == null) return;
|
||||
|
||||
// Sections
|
||||
var sections = body.Descendants(W + "sectPr").ToList();
|
||||
var sectionBreaks = sections.Select(s => (string?)s.Element(W + "type")?.Attribute(W + "val") ?? "nextPage").ToList();
|
||||
|
||||
// Headings
|
||||
var headings = new List<object>();
|
||||
foreach (var p in body.Descendants(W + "p"))
|
||||
{
|
||||
var style = (string?)p.Element(W + "pPr")?.Element(W + "pStyle")?.Attribute(W + "val");
|
||||
if (style?.StartsWith("Heading", StringComparison.OrdinalIgnoreCase) == true)
|
||||
{
|
||||
var text = string.Concat(p.Descendants(W + "t").Select(t => t.Value));
|
||||
headings.Add(new { style, text });
|
||||
}
|
||||
}
|
||||
|
||||
// Tables
|
||||
var tables = body.Descendants(W + "tbl").Select(tbl => new
|
||||
{
|
||||
rows = tbl.Elements(W + "tr").Count(),
|
||||
cols = tbl.Elements(W + "tr").FirstOrDefault()?.Elements(W + "tc").Count() ?? 0
|
||||
}).ToList();
|
||||
|
||||
// Images
|
||||
var images = body.Descendants(W + "drawing").Count();
|
||||
|
||||
// Headers/footers
|
||||
var headerRefs = sections.SelectMany(s => s.Elements(W + "headerReference")).Count();
|
||||
var footerRefs = sections.SelectMany(s => s.Elements(W + "footerReference")).Count();
|
||||
|
||||
// Paragraphs and word count
|
||||
var paragraphs = body.Descendants(W + "p").ToList();
|
||||
var allText = string.Concat(body.Descendants(W + "t").Select(t => t.Value));
|
||||
var wordCount = allText.Split(new[] { ' ', '\t', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries).Length;
|
||||
|
||||
// XML file sizes
|
||||
var fileSizes = zip.Entries
|
||||
.Where(e => e.FullName.StartsWith("word/") && e.FullName.EndsWith(".xml"))
|
||||
.Select(e => new { file = e.FullName, size = e.Length })
|
||||
.OrderByDescending(e => e.size)
|
||||
.ToList();
|
||||
|
||||
// Styles
|
||||
var styleNames = new List<string>();
|
||||
var stylesEntry = zip.GetEntry("word/styles.xml");
|
||||
if (stylesEntry != null)
|
||||
{
|
||||
using var stream = stylesEntry.Open();
|
||||
var stylesDoc = XDocument.Load(stream);
|
||||
styleNames = stylesDoc.Descendants(W + "style")
|
||||
.Where(s => (string?)s.Attribute(W + "customStyle") == "1")
|
||||
.Select(s => (string?)s.Attribute(W + "styleId") ?? "")
|
||||
.Where(s => s != "")
|
||||
.ToList();
|
||||
}
|
||||
|
||||
var analysis = new
|
||||
{
|
||||
sections = new { count = sections.Count, breakTypes = sectionBreaks },
|
||||
headings,
|
||||
tables = new { count = tables.Count, details = tables },
|
||||
images,
|
||||
headerFooter = new { headers = headerRefs, footers = footerRefs },
|
||||
paragraphs = paragraphs.Count,
|
||||
estimatedWordCount = wordCount,
|
||||
xmlFileSizes = fileSizes,
|
||||
customStyles = new { count = styleNames.Count, names = styleNames }
|
||||
};
|
||||
|
||||
if (asJson)
|
||||
{
|
||||
Console.WriteLine(JsonSerializer.Serialize(analysis, new JsonSerializerOptions { WriteIndented = true }));
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Sections: {sections.Count} ({string.Join(", ", sectionBreaks)})");
|
||||
Console.WriteLine($"Headings: {headings.Count}");
|
||||
foreach (var h in headings)
|
||||
Console.WriteLine($" {h}");
|
||||
Console.WriteLine($"Tables: {tables.Count}");
|
||||
foreach (var t in tables)
|
||||
Console.WriteLine($" {t.rows} rows x {t.cols} cols");
|
||||
Console.WriteLine($"Images: {images}");
|
||||
Console.WriteLine($"Headers: {headerRefs}");
|
||||
Console.WriteLine($"Footers: {footerRefs}");
|
||||
Console.WriteLine($"Paragraphs: {paragraphs.Count}");
|
||||
Console.WriteLine($"Word count: ~{wordCount}");
|
||||
Console.WriteLine($"Custom styles: {styleNames.Count}");
|
||||
foreach (var s in styleNames)
|
||||
Console.WriteLine($" {s}");
|
||||
Console.WriteLine("XML file sizes:");
|
||||
foreach (var f in fileSizes)
|
||||
Console.WriteLine($" {f.file}: {f.size:N0} bytes");
|
||||
}
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,322 @@
|
||||
using System.CommandLine;
|
||||
using DocumentFormat.OpenXml;
|
||||
using DocumentFormat.OpenXml.Packaging;
|
||||
using DocumentFormat.OpenXml.Wordprocessing;
|
||||
|
||||
namespace MiniMaxAIDocx.Core.Commands;
|
||||
|
||||
/// <summary>
|
||||
/// Scenario C: Apply formatting from a template DOCX to a source DOCX.
|
||||
/// Copies styles, theme, numbering, headers/footers, and section properties
|
||||
/// from the template while preserving all content from the source.
|
||||
/// </summary>
|
||||
public static class ApplyTemplateCommand
|
||||
{
|
||||
public static Command Create()
|
||||
{
|
||||
var inputOpt = new Option<string>("--input") { Description = "Source DOCX (content to keep)", Required = true };
|
||||
var templateOpt = new Option<string>("--template") { Description = "Template DOCX (formatting to apply)", Required = true };
|
||||
var outputOpt = new Option<string>("--output") { Description = "Output DOCX file path", Required = true };
|
||||
var applyStylesOpt = new Option<bool>("--apply-styles") { Description = "Copy styles.xml from template" };
|
||||
applyStylesOpt.DefaultValueFactory = _ => true;
|
||||
var applyThemeOpt = new Option<bool>("--apply-theme") { Description = "Copy theme from template" };
|
||||
applyThemeOpt.DefaultValueFactory = _ => true;
|
||||
var applyNumberingOpt = new Option<bool>("--apply-numbering") { Description = "Copy numbering.xml from template" };
|
||||
applyNumberingOpt.DefaultValueFactory = _ => true;
|
||||
var applyHeadersFootersOpt = new Option<bool>("--apply-headers-footers") { Description = "Copy headers/footers from template" };
|
||||
var applySectionsOpt = new Option<bool>("--apply-sections") { Description = "Apply section properties from template" };
|
||||
applySectionsOpt.DefaultValueFactory = _ => true;
|
||||
|
||||
var cmd = new Command("apply-template", "Apply template formatting to a DOCX")
|
||||
{
|
||||
inputOpt, templateOpt, outputOpt, applyStylesOpt, applyThemeOpt,
|
||||
applyNumberingOpt, applyHeadersFootersOpt, applySectionsOpt
|
||||
};
|
||||
|
||||
cmd.SetAction((parseResult) =>
|
||||
{
|
||||
var inputPath = parseResult.GetValue(inputOpt)!;
|
||||
var templatePath = parseResult.GetValue(templateOpt)!;
|
||||
var outputPath = parseResult.GetValue(outputOpt)!;
|
||||
var applyStyles = parseResult.GetValue(applyStylesOpt);
|
||||
var applyTheme = parseResult.GetValue(applyThemeOpt);
|
||||
var applyNumbering = parseResult.GetValue(applyNumberingOpt);
|
||||
var applyHeadersFooters = parseResult.GetValue(applyHeadersFootersOpt);
|
||||
var applySections = parseResult.GetValue(applySectionsOpt);
|
||||
|
||||
if (!File.Exists(inputPath)) { Console.Error.WriteLine($"Input file not found: {inputPath}"); return; }
|
||||
if (!File.Exists(templatePath)) { Console.Error.WriteLine($"Template file not found: {templatePath}"); return; }
|
||||
|
||||
// Create output as a copy of the source
|
||||
File.Copy(inputPath, outputPath, overwrite: true);
|
||||
|
||||
using var output = WordprocessingDocument.Open(outputPath, true);
|
||||
using var template = WordprocessingDocument.Open(templatePath, false);
|
||||
|
||||
var outputMain = output.MainDocumentPart;
|
||||
var templateMain = template.MainDocumentPart;
|
||||
if (outputMain == null || templateMain == null)
|
||||
{
|
||||
Console.Error.WriteLine("Invalid document: missing main document part.");
|
||||
return;
|
||||
}
|
||||
|
||||
int appliedCount = 0;
|
||||
|
||||
if (applyStyles)
|
||||
{
|
||||
CopyStyles(templateMain, outputMain);
|
||||
appliedCount++;
|
||||
Console.WriteLine(" Applied: styles");
|
||||
}
|
||||
|
||||
if (applyTheme)
|
||||
{
|
||||
CopyTheme(templateMain, outputMain);
|
||||
appliedCount++;
|
||||
Console.WriteLine(" Applied: theme");
|
||||
}
|
||||
|
||||
if (applyNumbering)
|
||||
{
|
||||
CopyNumbering(templateMain, outputMain);
|
||||
appliedCount++;
|
||||
Console.WriteLine(" Applied: numbering");
|
||||
}
|
||||
|
||||
if (applyHeadersFooters)
|
||||
{
|
||||
CopyHeadersAndFooters(templateMain, outputMain);
|
||||
appliedCount++;
|
||||
Console.WriteLine(" Applied: headers/footers");
|
||||
}
|
||||
|
||||
if (applySections)
|
||||
{
|
||||
CopySectionProperties(templateMain, outputMain);
|
||||
appliedCount++;
|
||||
Console.WriteLine(" Applied: section properties");
|
||||
}
|
||||
|
||||
outputMain.Document.Save();
|
||||
Console.WriteLine($"Applied {appliedCount} formatting component(s) from template to {outputPath}");
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Replaces the output's StyleDefinitionsPart with the template's version.
|
||||
/// </summary>
|
||||
private static void CopyStyles(MainDocumentPart template, MainDocumentPart output)
|
||||
{
|
||||
var templateStyles = template.StyleDefinitionsPart;
|
||||
if (templateStyles == null) return;
|
||||
|
||||
if (output.StyleDefinitionsPart != null)
|
||||
output.DeletePart(output.StyleDefinitionsPart);
|
||||
|
||||
var newStylesPart = output.AddNewPart<StyleDefinitionsPart>();
|
||||
|
||||
using var stream = templateStyles.GetStream(FileMode.Open, FileAccess.Read);
|
||||
newStylesPart.FeedData(stream);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Replaces the output's ThemePart with the template's version.
|
||||
/// </summary>
|
||||
private static void CopyTheme(MainDocumentPart template, MainDocumentPart output)
|
||||
{
|
||||
var templateTheme = template.ThemePart;
|
||||
if (templateTheme == null) return;
|
||||
|
||||
if (output.ThemePart != null)
|
||||
output.DeletePart(output.ThemePart);
|
||||
|
||||
var newThemePart = output.AddNewPart<ThemePart>();
|
||||
|
||||
using var stream = templateTheme.GetStream(FileMode.Open, FileAccess.Read);
|
||||
newThemePart.FeedData(stream);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Copies numbering definitions from template, remapping numbering IDs
|
||||
/// referenced in the output document's paragraphs.
|
||||
/// </summary>
|
||||
private static void CopyNumbering(MainDocumentPart template, MainDocumentPart output)
|
||||
{
|
||||
var templateNumbering = template.NumberingDefinitionsPart;
|
||||
if (templateNumbering == null) return;
|
||||
|
||||
var referencedNumIds = new HashSet<string>();
|
||||
var body = output.Document.Body;
|
||||
if (body != null)
|
||||
{
|
||||
foreach (var numId in body.Descendants<NumberingId>())
|
||||
{
|
||||
if (numId.Val?.Value != null)
|
||||
referencedNumIds.Add(numId.Val.Value.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
if (output.NumberingDefinitionsPart != null)
|
||||
output.DeletePart(output.NumberingDefinitionsPart);
|
||||
|
||||
var newNumberingPart = output.AddNewPart<NumberingDefinitionsPart>();
|
||||
|
||||
using var stream = templateNumbering.GetStream(FileMode.Open, FileAccess.Read);
|
||||
newNumberingPart.FeedData(stream);
|
||||
|
||||
if (referencedNumIds.Count > 0)
|
||||
{
|
||||
Console.WriteLine($" Note: {referencedNumIds.Count} numbering reference(s) in document content mapped to template definitions.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Copies headers and footers from the template, remapping relationship IDs.
|
||||
/// </summary>
|
||||
private static void CopyHeadersAndFooters(MainDocumentPart template, MainDocumentPart output)
|
||||
{
|
||||
var outputBody = output.Document.Body;
|
||||
if (outputBody == null) return;
|
||||
|
||||
// Remove existing header/footer parts from output
|
||||
foreach (var hp in output.HeaderParts.ToList())
|
||||
output.DeletePart(hp);
|
||||
foreach (var fp in output.FooterParts.ToList())
|
||||
output.DeletePart(fp);
|
||||
|
||||
// Remove existing header/footer references from all section properties
|
||||
foreach (var sectPr in outputBody.Descendants<SectionProperties>())
|
||||
{
|
||||
foreach (var hr in sectPr.Elements<HeaderReference>().ToList())
|
||||
hr.Remove();
|
||||
foreach (var fr in sectPr.Elements<FooterReference>().ToList())
|
||||
fr.Remove();
|
||||
}
|
||||
|
||||
var templateBody = template.Document?.Body;
|
||||
if (templateBody == null) return;
|
||||
|
||||
var templateFinalSectPr = templateBody.Descendants<SectionProperties>().LastOrDefault();
|
||||
if (templateFinalSectPr == null) return;
|
||||
|
||||
var outputFinalSectPr = outputBody.Descendants<SectionProperties>().LastOrDefault();
|
||||
if (outputFinalSectPr == null)
|
||||
{
|
||||
outputFinalSectPr = new SectionProperties();
|
||||
outputBody.Append(outputFinalSectPr);
|
||||
}
|
||||
|
||||
// Copy headers
|
||||
foreach (var headerRef in templateFinalSectPr.Elements<HeaderReference>())
|
||||
{
|
||||
var templateHeaderPart = template.GetPartById(headerRef.Id!) as HeaderPart;
|
||||
if (templateHeaderPart == null) continue;
|
||||
|
||||
var newHeaderPart = output.AddNewPart<HeaderPart>();
|
||||
using (var stream = templateHeaderPart.GetStream(FileMode.Open, FileAccess.Read))
|
||||
{
|
||||
newHeaderPart.FeedData(stream);
|
||||
}
|
||||
|
||||
CopyPartRelationships(templateHeaderPart, newHeaderPart);
|
||||
|
||||
var newRefId = output.GetIdOfPart(newHeaderPart);
|
||||
outputFinalSectPr.InsertAt(new HeaderReference
|
||||
{
|
||||
Type = headerRef.Type,
|
||||
Id = newRefId
|
||||
}, 0);
|
||||
}
|
||||
|
||||
// Copy footers
|
||||
foreach (var footerRef in templateFinalSectPr.Elements<FooterReference>())
|
||||
{
|
||||
var templateFooterPart = template.GetPartById(footerRef.Id!) as FooterPart;
|
||||
if (templateFooterPart == null) continue;
|
||||
|
||||
var newFooterPart = output.AddNewPart<FooterPart>();
|
||||
using (var stream = templateFooterPart.GetStream(FileMode.Open, FileAccess.Read))
|
||||
{
|
||||
newFooterPart.FeedData(stream);
|
||||
}
|
||||
|
||||
CopyPartRelationships(templateFooterPart, newFooterPart);
|
||||
|
||||
var newRefId = output.GetIdOfPart(newFooterPart);
|
||||
var lastHeaderRef = outputFinalSectPr.Elements<HeaderReference>().LastOrDefault();
|
||||
if (lastHeaderRef != null)
|
||||
lastHeaderRef.InsertAfterSelf(new FooterReference { Type = footerRef.Type, Id = newRefId });
|
||||
else
|
||||
outputFinalSectPr.InsertAt(new FooterReference { Type = footerRef.Type, Id = newRefId }, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Copies sub-relationships (images, etc.) from a source part to a target part.
|
||||
/// </summary>
|
||||
private static void CopyPartRelationships(OpenXmlPart source, OpenXmlPart target)
|
||||
{
|
||||
foreach (var rel in source.ExternalRelationships)
|
||||
{
|
||||
target.AddExternalRelationship(rel.RelationshipType, rel.Uri, rel.Id);
|
||||
}
|
||||
|
||||
foreach (var childPart in source.Parts)
|
||||
{
|
||||
try
|
||||
{
|
||||
var contentType = childPart.OpenXmlPart.ContentType;
|
||||
if (contentType.StartsWith("image/"))
|
||||
{
|
||||
var newChild = target.AddNewPart<ImagePart>(contentType, childPart.RelationshipId);
|
||||
using var stream = childPart.OpenXmlPart.GetStream(FileMode.Open, FileAccess.Read);
|
||||
newChild.FeedData(stream);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.Error.WriteLine($"[WARN] Skipped non-image embedded part: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Copies page size, margins, columns, and document grid from template section properties.
|
||||
/// </summary>
|
||||
private static void CopySectionProperties(MainDocumentPart template, MainDocumentPart output)
|
||||
{
|
||||
var templateBody = template.Document?.Body;
|
||||
var outputBody = output.Document?.Body;
|
||||
if (templateBody == null || outputBody == null) return;
|
||||
|
||||
var templateSectPr = templateBody.Descendants<SectionProperties>().LastOrDefault();
|
||||
if (templateSectPr == null) return;
|
||||
|
||||
var outputSectPr = outputBody.Descendants<SectionProperties>().LastOrDefault();
|
||||
if (outputSectPr == null)
|
||||
{
|
||||
outputSectPr = new SectionProperties();
|
||||
outputBody.Append(outputSectPr);
|
||||
}
|
||||
|
||||
CopyChildElement<PageSize>(templateSectPr, outputSectPr);
|
||||
CopyChildElement<PageMargin>(templateSectPr, outputSectPr);
|
||||
CopyChildElement<Columns>(templateSectPr, outputSectPr);
|
||||
CopyChildElement<DocGrid>(templateSectPr, outputSectPr);
|
||||
CopyChildElement<PageBorders>(templateSectPr, outputSectPr);
|
||||
}
|
||||
|
||||
private static void CopyChildElement<T>(SectionProperties source, SectionProperties target) where T : OpenXmlElement
|
||||
{
|
||||
var sourceElement = source.GetFirstChild<T>();
|
||||
if (sourceElement == null) return;
|
||||
|
||||
var existing = target.GetFirstChild<T>();
|
||||
existing?.Remove();
|
||||
|
||||
target.Append((T)sourceElement.CloneNode(true));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,324 @@
|
||||
using System.CommandLine;
|
||||
using DocumentFormat.OpenXml;
|
||||
using DocumentFormat.OpenXml.Packaging;
|
||||
using DocumentFormat.OpenXml.Wordprocessing;
|
||||
using MiniMaxAIDocx.Core.OpenXml;
|
||||
using MiniMaxAIDocx.Core.Typography;
|
||||
|
||||
namespace MiniMaxAIDocx.Core.Commands;
|
||||
|
||||
/// <summary>
|
||||
/// Scenario A: Create a new DOCX document from scratch with proper styles, sections,
|
||||
/// headers/footers, and typography defaults.
|
||||
/// </summary>
|
||||
public static class CreateCommand
|
||||
{
|
||||
public static Command Create()
|
||||
{
|
||||
var outputOption = new Option<string>("--output") { Description = "Output DOCX file path", Required = true };
|
||||
var typeOption = new Option<string>("--type") { Description = "Document type: report, letter, memo, academic" };
|
||||
typeOption.DefaultValueFactory = _ => "report";
|
||||
var titleOption = new Option<string>("--title") { Description = "Document title" };
|
||||
var authorOption = new Option<string>("--author") { Description = "Document author" };
|
||||
var pageSizeOption = new Option<string>("--page-size") { Description = "Page size: letter, a4, legal, a3" };
|
||||
pageSizeOption.DefaultValueFactory = _ => "letter";
|
||||
var marginsOption = new Option<string>("--margins") { Description = "Margin preset: standard, narrow, wide" };
|
||||
marginsOption.DefaultValueFactory = _ => "standard";
|
||||
var headerTextOption = new Option<string>("--header") { Description = "Header text" };
|
||||
var footerTextOption = new Option<string>("--footer") { Description = "Footer text" };
|
||||
var pageNumbersOption = new Option<bool>("--page-numbers") { Description = "Add page numbers in footer" };
|
||||
var tocOption = new Option<bool>("--toc") { Description = "Insert table of contents placeholder" };
|
||||
var contentJsonOption = new Option<string>("--content-json") { Description = "Path to JSON file describing document content" };
|
||||
|
||||
var cmd = new Command("create", "Create a new DOCX document from scratch")
|
||||
{
|
||||
outputOption, typeOption, titleOption, authorOption, pageSizeOption,
|
||||
marginsOption, headerTextOption, footerTextOption, pageNumbersOption,
|
||||
tocOption, contentJsonOption
|
||||
};
|
||||
|
||||
cmd.SetAction((parseResult) =>
|
||||
{
|
||||
var output = parseResult.GetValue(outputOption)!;
|
||||
var docType = parseResult.GetValue(typeOption) ?? "report";
|
||||
var title = parseResult.GetValue(titleOption);
|
||||
var author = parseResult.GetValue(authorOption);
|
||||
var pageSizeName = parseResult.GetValue(pageSizeOption) ?? "letter";
|
||||
var marginsName = parseResult.GetValue(marginsOption) ?? "standard";
|
||||
var headerText = parseResult.GetValue(headerTextOption);
|
||||
var footerText = parseResult.GetValue(footerTextOption);
|
||||
var pageNumbers = parseResult.GetValue(pageNumbersOption);
|
||||
var tocPlaceholder = parseResult.GetValue(tocOption);
|
||||
var contentJson = parseResult.GetValue(contentJsonOption);
|
||||
|
||||
var fontConfig = GetFontConfig(docType);
|
||||
var pageSize = GetPageSizeConfig(pageSizeName);
|
||||
var margins = GetMargins(marginsName);
|
||||
|
||||
using var doc = WordprocessingDocument.Create(output, WordprocessingDocumentType.Document);
|
||||
var mainPart = doc.AddMainDocumentPart();
|
||||
mainPart.Document = new Document(new Body());
|
||||
var body = mainPart.Document.Body!;
|
||||
|
||||
// Add styles part with defaults
|
||||
AddDefaultStyles(mainPart, fontConfig);
|
||||
|
||||
// Add section properties (page size, margins)
|
||||
var sectPr = new SectionProperties();
|
||||
sectPr.Append(new DocumentFormat.OpenXml.Wordprocessing.PageSize
|
||||
{
|
||||
Width = (UInt32Value)(uint)pageSize.WidthDxa,
|
||||
Height = (UInt32Value)(uint)pageSize.HeightDxa
|
||||
});
|
||||
sectPr.Append(new PageMargin
|
||||
{
|
||||
Top = margins.TopDxa,
|
||||
Bottom = margins.BottomDxa,
|
||||
Left = (UInt32Value)(uint)margins.LeftDxa,
|
||||
Right = (UInt32Value)(uint)margins.RightDxa
|
||||
});
|
||||
|
||||
// Add header if requested
|
||||
if (!string.IsNullOrEmpty(headerText))
|
||||
{
|
||||
var headerPart = mainPart.AddNewPart<HeaderPart>();
|
||||
headerPart.Header = new Header(
|
||||
new Paragraph(new Run(new Text(headerText))));
|
||||
var headerRefId = mainPart.GetIdOfPart(headerPart);
|
||||
sectPr.Append(new HeaderReference
|
||||
{
|
||||
Type = HeaderFooterValues.Default,
|
||||
Id = headerRefId
|
||||
});
|
||||
}
|
||||
|
||||
// Add footer if requested
|
||||
if (!string.IsNullOrEmpty(footerText) || pageNumbers)
|
||||
{
|
||||
var footerPart = mainPart.AddNewPart<FooterPart>();
|
||||
var footerParagraph = new Paragraph();
|
||||
|
||||
if (!string.IsNullOrEmpty(footerText))
|
||||
{
|
||||
footerParagraph.Append(new Run(new Text(footerText)));
|
||||
}
|
||||
|
||||
if (pageNumbers)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(footerText))
|
||||
footerParagraph.Append(new Run(new Text(" — ") { Space = SpaceProcessingModeValues.Preserve }));
|
||||
|
||||
footerParagraph.Append(new Run(
|
||||
new FieldChar { FieldCharType = FieldCharValues.Begin }));
|
||||
footerParagraph.Append(new Run(
|
||||
new FieldCode(" PAGE ") { Space = SpaceProcessingModeValues.Preserve }));
|
||||
footerParagraph.Append(new Run(
|
||||
new FieldChar { FieldCharType = FieldCharValues.End }));
|
||||
}
|
||||
|
||||
footerPart.Footer = new Footer(footerParagraph);
|
||||
var footerRefId = mainPart.GetIdOfPart(footerPart);
|
||||
sectPr.Append(new FooterReference
|
||||
{
|
||||
Type = HeaderFooterValues.Default,
|
||||
Id = footerRefId
|
||||
});
|
||||
}
|
||||
|
||||
// Title
|
||||
if (!string.IsNullOrEmpty(title))
|
||||
{
|
||||
var titlePara = new Paragraph(
|
||||
new ParagraphProperties(new ParagraphStyleId { Val = "Title" }),
|
||||
new Run(new Text(title)));
|
||||
body.Append(titlePara);
|
||||
}
|
||||
|
||||
// Author subtitle
|
||||
if (!string.IsNullOrEmpty(author))
|
||||
{
|
||||
var authorPara = new Paragraph(
|
||||
new ParagraphProperties(new ParagraphStyleId { Val = "Subtitle" }),
|
||||
new Run(new Text(author)));
|
||||
body.Append(authorPara);
|
||||
}
|
||||
|
||||
// TOC placeholder
|
||||
if (tocPlaceholder)
|
||||
{
|
||||
body.Append(new Paragraph(
|
||||
new ParagraphProperties(new ParagraphStyleId { Val = "TOCHeading" }),
|
||||
new Run(new Text("Table of Contents"))));
|
||||
|
||||
// Insert TOC field
|
||||
var tocPara = new Paragraph();
|
||||
tocPara.Append(new Run(new FieldChar { FieldCharType = FieldCharValues.Begin }));
|
||||
tocPara.Append(new Run(new FieldCode(" TOC \\o \"1-3\" \\h \\z \\u ") { Space = SpaceProcessingModeValues.Preserve }));
|
||||
tocPara.Append(new Run(new FieldChar { FieldCharType = FieldCharValues.Separate }));
|
||||
tocPara.Append(new Run(new Text("Update this field to generate table of contents.")));
|
||||
tocPara.Append(new Run(new FieldChar { FieldCharType = FieldCharValues.End }));
|
||||
body.Append(tocPara);
|
||||
|
||||
// Page break after TOC
|
||||
body.Append(new Paragraph(new Run(new Break { Type = BreakValues.Page })));
|
||||
}
|
||||
|
||||
// Content from JSON (if provided)
|
||||
if (!string.IsNullOrEmpty(contentJson) && File.Exists(contentJson))
|
||||
{
|
||||
var jsonContent = File.ReadAllText(contentJson);
|
||||
AddContentFromJson(body, jsonContent, fontConfig);
|
||||
}
|
||||
|
||||
// Ensure body has at least one paragraph
|
||||
if (!body.Elements<Paragraph>().Any())
|
||||
{
|
||||
body.Append(new Paragraph());
|
||||
}
|
||||
|
||||
// sectPr must be the last child of body
|
||||
body.Append(sectPr);
|
||||
|
||||
mainPart.Document.Save();
|
||||
Console.WriteLine($"Created {docType} document: {output}");
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
||||
private static FontConfig GetFontConfig(string docType) => docType.ToLowerInvariant() switch
|
||||
{
|
||||
"letter" => FontDefaults.Letter,
|
||||
"memo" => FontDefaults.Memo,
|
||||
"academic" => FontDefaults.Academic,
|
||||
_ => FontDefaults.Report,
|
||||
};
|
||||
|
||||
private static Typography.PageSize GetPageSizeConfig(string name) => name.ToLowerInvariant() switch
|
||||
{
|
||||
"a4" => PageSizes.A4,
|
||||
"legal" => PageSizes.Legal,
|
||||
"a3" => PageSizes.A3,
|
||||
_ => PageSizes.Letter,
|
||||
};
|
||||
|
||||
private static MarginConfig GetMargins(string name) => name.ToLowerInvariant() switch
|
||||
{
|
||||
"narrow" => PageSizes.NarrowMargins,
|
||||
"wide" => PageSizes.WideMargins,
|
||||
_ => PageSizes.StandardMargins,
|
||||
};
|
||||
|
||||
private static void AddDefaultStyles(MainDocumentPart mainPart, FontConfig fontConfig)
|
||||
{
|
||||
var stylesPart = mainPart.AddNewPart<StyleDefinitionsPart>();
|
||||
var styles = new Styles();
|
||||
|
||||
// Default run properties
|
||||
var defaultRPr = new StyleRunProperties(
|
||||
new RunFonts { Ascii = fontConfig.BodyFont, HighAnsi = fontConfig.BodyFont },
|
||||
new FontSize { Val = UnitConverter.FontSizeToSz(fontConfig.BodySize) },
|
||||
new FontSizeComplexScript { Val = UnitConverter.FontSizeToSz(fontConfig.BodySize) });
|
||||
|
||||
// Normal style
|
||||
styles.Append(new Style(
|
||||
new StyleName { Val = "Normal" },
|
||||
new PrimaryStyle(),
|
||||
defaultRPr)
|
||||
{ Type = StyleValues.Paragraph, StyleId = "Normal", Default = true });
|
||||
|
||||
// Heading styles 1-6
|
||||
double[] headingSizes = [fontConfig.Heading1Size, fontConfig.Heading2Size, fontConfig.Heading3Size,
|
||||
fontConfig.Heading4Size, fontConfig.Heading5Size, fontConfig.Heading6Size];
|
||||
for (int i = 0; i < 6; i++)
|
||||
{
|
||||
var level = i + 1;
|
||||
var headingStyle = new Style(
|
||||
new StyleName { Val = $"heading {level}" },
|
||||
new BasedOn { Val = "Normal" },
|
||||
new NextParagraphStyle { Val = "Normal" },
|
||||
new PrimaryStyle(),
|
||||
new StyleParagraphProperties(
|
||||
new KeepNext(),
|
||||
new KeepLines(),
|
||||
new SpacingBetweenLines { Before = "240", After = "120" },
|
||||
new OutlineLevel { Val = i }),
|
||||
new StyleRunProperties(
|
||||
new RunFonts { Ascii = fontConfig.HeadingFont, HighAnsi = fontConfig.HeadingFont },
|
||||
new FontSize { Val = UnitConverter.FontSizeToSz(headingSizes[i]) },
|
||||
new FontSizeComplexScript { Val = UnitConverter.FontSizeToSz(headingSizes[i]) },
|
||||
new Bold()))
|
||||
{ Type = StyleValues.Paragraph, StyleId = $"Heading{level}" };
|
||||
styles.Append(headingStyle);
|
||||
}
|
||||
|
||||
// Title style
|
||||
styles.Append(new Style(
|
||||
new StyleName { Val = "Title" },
|
||||
new BasedOn { Val = "Normal" },
|
||||
new NextParagraphStyle { Val = "Normal" },
|
||||
new PrimaryStyle(),
|
||||
new StyleParagraphProperties(
|
||||
new Justification { Val = JustificationValues.Center },
|
||||
new SpacingBetweenLines { After = "300" }),
|
||||
new StyleRunProperties(
|
||||
new RunFonts { Ascii = fontConfig.HeadingFont, HighAnsi = fontConfig.HeadingFont },
|
||||
new FontSize { Val = UnitConverter.FontSizeToSz(fontConfig.Heading1Size + 6) },
|
||||
new FontSizeComplexScript { Val = UnitConverter.FontSizeToSz(fontConfig.Heading1Size + 6) }))
|
||||
{ Type = StyleValues.Paragraph, StyleId = "Title" });
|
||||
|
||||
// Subtitle style
|
||||
styles.Append(new Style(
|
||||
new StyleName { Val = "Subtitle" },
|
||||
new BasedOn { Val = "Normal" },
|
||||
new NextParagraphStyle { Val = "Normal" },
|
||||
new StyleParagraphProperties(
|
||||
new Justification { Val = JustificationValues.Center },
|
||||
new SpacingBetweenLines { After = "200" }),
|
||||
new StyleRunProperties(
|
||||
new Color { Val = "5A5A5A" },
|
||||
new FontSize { Val = UnitConverter.FontSizeToSz(fontConfig.BodySize + 2) }))
|
||||
{ Type = StyleValues.Paragraph, StyleId = "Subtitle" });
|
||||
|
||||
stylesPart.Styles = styles;
|
||||
stylesPart.Styles.Save();
|
||||
}
|
||||
|
||||
private static void AddContentFromJson(Body body, string jsonContent, FontConfig fontConfig)
|
||||
{
|
||||
// Simple JSON content format: array of {type, text, level?}
|
||||
// e.g. [{"type":"heading","text":"Introduction","level":1},{"type":"paragraph","text":"..."}]
|
||||
try
|
||||
{
|
||||
using var jsonDoc = System.Text.Json.JsonDocument.Parse(jsonContent);
|
||||
foreach (var element in jsonDoc.RootElement.EnumerateArray())
|
||||
{
|
||||
var type = element.GetProperty("type").GetString() ?? "paragraph";
|
||||
var text = element.GetProperty("text").GetString() ?? "";
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case "heading":
|
||||
var level = element.TryGetProperty("level", out var lvl) ? lvl.GetInt32() : 1;
|
||||
level = Math.Clamp(level, 1, 6);
|
||||
body.Append(new Paragraph(
|
||||
new ParagraphProperties(new ParagraphStyleId { Val = $"Heading{level}" }),
|
||||
new Run(new Text(text))));
|
||||
break;
|
||||
|
||||
case "paragraph":
|
||||
body.Append(new Paragraph(new Run(new Text(text))));
|
||||
break;
|
||||
|
||||
case "pagebreak":
|
||||
body.Append(new Paragraph(new Run(new Break { Type = BreakValues.Page })));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (System.Text.Json.JsonException ex)
|
||||
{
|
||||
Console.Error.WriteLine($"Warning: could not parse content JSON: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,155 @@
|
||||
using System.CommandLine;
|
||||
using System.IO.Compression;
|
||||
using System.Text.Json;
|
||||
using System.Xml.Linq;
|
||||
|
||||
namespace MiniMaxAIDocx.Core.Commands;
|
||||
|
||||
public static class DiffCommand
|
||||
{
|
||||
private static readonly XNamespace W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
|
||||
|
||||
public static Command Create()
|
||||
{
|
||||
var beforeOption = new Option<string>("--before") { Description = "Original DOCX", Required = true };
|
||||
var afterOption = new Option<string>("--after") { Description = "Modified DOCX", Required = true };
|
||||
var jsonOption = new Option<bool>("--json") { Description = "Output as JSON" };
|
||||
|
||||
var cmd = new Command("diff", "Compare two DOCX files")
|
||||
{
|
||||
beforeOption, afterOption, jsonOption
|
||||
};
|
||||
|
||||
cmd.SetAction((parseResult) =>
|
||||
{
|
||||
var before = parseResult.GetValue(beforeOption)!;
|
||||
var after = parseResult.GetValue(afterOption)!;
|
||||
var asJson = parseResult.GetValue(jsonOption);
|
||||
|
||||
if (!File.Exists(before)) { Console.Error.WriteLine($"File not found: {before}"); return; }
|
||||
if (!File.Exists(after)) { Console.Error.WriteLine($"File not found: {after}"); return; }
|
||||
|
||||
var beforeParas = ExtractParagraphs(before);
|
||||
var afterParas = ExtractParagraphs(after);
|
||||
var beforeStyles = ExtractStyleIds(before);
|
||||
var afterStyles = ExtractStyleIds(after);
|
||||
var beforeStructure = ExtractStructure(before);
|
||||
var afterStructure = ExtractStructure(after);
|
||||
|
||||
// Text diff
|
||||
var textChanges = new List<object>();
|
||||
int maxLen = Math.Max(beforeParas.Count, afterParas.Count);
|
||||
int changedParas = 0;
|
||||
for (int i = 0; i < maxLen; i++)
|
||||
{
|
||||
var bText = i < beforeParas.Count ? beforeParas[i] : null;
|
||||
var aText = i < afterParas.Count ? afterParas[i] : null;
|
||||
|
||||
if (bText != aText)
|
||||
{
|
||||
changedParas++;
|
||||
textChanges.Add(new
|
||||
{
|
||||
paragraph = i + 1,
|
||||
before = bText ?? "(absent)",
|
||||
after = aText ?? "(absent)"
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Style diff
|
||||
var addedStyles = afterStyles.Except(beforeStyles).ToList();
|
||||
var removedStyles = beforeStyles.Except(afterStyles).ToList();
|
||||
|
||||
// Structure diff
|
||||
var structureChanges = new List<string>();
|
||||
if (beforeStructure.Sections != afterStructure.Sections)
|
||||
structureChanges.Add($"Sections: {beforeStructure.Sections} -> {afterStructure.Sections}");
|
||||
if (beforeStructure.Tables != afterStructure.Tables)
|
||||
structureChanges.Add($"Tables: {beforeStructure.Tables} -> {afterStructure.Tables}");
|
||||
if (beforeStructure.Images != afterStructure.Images)
|
||||
structureChanges.Add($"Images: {beforeStructure.Images} -> {afterStructure.Images}");
|
||||
|
||||
var result = new
|
||||
{
|
||||
textChanges,
|
||||
styleChanges = new { added = addedStyles, removed = removedStyles },
|
||||
structureChanges,
|
||||
summary = $"{changedParas} paragraphs changed, {addedStyles.Count + removedStyles.Count} styles modified, {structureChanges.Count} structural changes"
|
||||
};
|
||||
|
||||
if (asJson)
|
||||
{
|
||||
Console.WriteLine(JsonSerializer.Serialize(result, new JsonSerializerOptions { WriteIndented = true }));
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine(result.summary);
|
||||
Console.WriteLine();
|
||||
|
||||
if (textChanges.Count > 0)
|
||||
{
|
||||
Console.WriteLine($"Text changes ({textChanges.Count}):");
|
||||
foreach (var tc in textChanges.Take(20))
|
||||
Console.WriteLine($" {tc}");
|
||||
if (textChanges.Count > 20)
|
||||
Console.WriteLine($" ... and {textChanges.Count - 20} more");
|
||||
}
|
||||
|
||||
if (addedStyles.Count > 0)
|
||||
Console.WriteLine($"Added styles: {string.Join(", ", addedStyles)}");
|
||||
if (removedStyles.Count > 0)
|
||||
Console.WriteLine($"Removed styles: {string.Join(", ", removedStyles)}");
|
||||
|
||||
foreach (var sc in structureChanges)
|
||||
Console.WriteLine($"Structure: {sc}");
|
||||
}
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
||||
private static List<string> ExtractParagraphs(string docxPath)
|
||||
{
|
||||
using var zip = ZipFile.OpenRead(docxPath);
|
||||
var entry = zip.GetEntry("word/document.xml");
|
||||
if (entry == null) return new();
|
||||
|
||||
using var stream = entry.Open();
|
||||
var doc = XDocument.Load(stream);
|
||||
return doc.Descendants(W + "p")
|
||||
.Select(p => string.Concat(p.Descendants(W + "t").Select(t => t.Value)))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static HashSet<string> ExtractStyleIds(string docxPath)
|
||||
{
|
||||
using var zip = ZipFile.OpenRead(docxPath);
|
||||
var entry = zip.GetEntry("word/styles.xml");
|
||||
if (entry == null) return new();
|
||||
|
||||
using var stream = entry.Open();
|
||||
var doc = XDocument.Load(stream);
|
||||
return doc.Descendants(W + "style")
|
||||
.Select(s => (string?)s.Attribute(W + "styleId"))
|
||||
.Where(id => id != null)
|
||||
.ToHashSet()!;
|
||||
}
|
||||
|
||||
private record StructureInfo(int Sections, int Tables, int Images);
|
||||
|
||||
private static StructureInfo ExtractStructure(string docxPath)
|
||||
{
|
||||
using var zip = ZipFile.OpenRead(docxPath);
|
||||
var entry = zip.GetEntry("word/document.xml");
|
||||
if (entry == null) return new(0, 0, 0);
|
||||
|
||||
using var stream = entry.Open();
|
||||
var doc = XDocument.Load(stream);
|
||||
return new(
|
||||
doc.Descendants(W + "sectPr").Count(),
|
||||
doc.Descendants(W + "tbl").Count(),
|
||||
doc.Descendants(W + "drawing").Count()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,487 @@
|
||||
using System.CommandLine;
|
||||
using System.Text.RegularExpressions;
|
||||
using DocumentFormat.OpenXml;
|
||||
using DocumentFormat.OpenXml.Packaging;
|
||||
using DocumentFormat.OpenXml.Wordprocessing;
|
||||
using MiniMaxAIDocx.Core.OpenXml;
|
||||
|
||||
namespace MiniMaxAIDocx.Core.Commands;
|
||||
|
||||
/// <summary>
|
||||
/// Scenario B: Surgical content editing operations on existing DOCX files.
|
||||
/// Preserves all existing formatting and minimizes XML changes.
|
||||
/// </summary>
|
||||
public static class EditContentCommand
|
||||
{
|
||||
public static Command Create()
|
||||
{
|
||||
var cmd = new Command("edit", "Edit existing DOCX content");
|
||||
|
||||
cmd.Add(CreateReplaceTextCommand());
|
||||
cmd.Add(CreateFillTableCommand());
|
||||
cmd.Add(CreateInsertParagraphCommand());
|
||||
cmd.Add(CreateUpdateFieldCommand());
|
||||
cmd.Add(CreateListPlaceholdersCommand());
|
||||
cmd.Add(CreateFillPlaceholdersCommand());
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
||||
private static Command CreateReplaceTextCommand()
|
||||
{
|
||||
var inputOpt = new Option<string>("--input") { Description = "Input DOCX file", Required = true };
|
||||
var outputOpt = new Option<string>("--output") { Description = "Output file path (defaults to overwriting input)" };
|
||||
var searchOpt = new Option<string>("--search") { Description = "Text to search for", Required = true };
|
||||
var replaceOpt = new Option<string>("--replace") { Description = "Replacement text", Required = true };
|
||||
var regexOpt = new Option<bool>("--regex") { Description = "Treat search as a regex pattern" };
|
||||
|
||||
var cmd = new Command("replace-text", "Replace text while preserving formatting")
|
||||
{
|
||||
inputOpt, outputOpt, searchOpt, replaceOpt, regexOpt
|
||||
};
|
||||
|
||||
cmd.SetAction((parseResult) =>
|
||||
{
|
||||
var input = parseResult.GetValue(inputOpt)!;
|
||||
var output = parseResult.GetValue(outputOpt) ?? input;
|
||||
var search = parseResult.GetValue(searchOpt)!;
|
||||
var replace = parseResult.GetValue(replaceOpt)!;
|
||||
var useRegex = parseResult.GetValue(regexOpt);
|
||||
|
||||
if (output != input) File.Copy(input, output, overwrite: true);
|
||||
|
||||
using var doc = WordprocessingDocument.Open(output, true);
|
||||
var body = doc.MainDocumentPart?.Document.Body;
|
||||
if (body == null) { Console.Error.WriteLine("No document body found."); return; }
|
||||
|
||||
int count = 0;
|
||||
foreach (var paragraph in body.Descendants<Paragraph>())
|
||||
{
|
||||
count += ReplaceInParagraph(paragraph, search, replace, useRegex);
|
||||
}
|
||||
|
||||
doc.MainDocumentPart!.Document.Save();
|
||||
Console.WriteLine($"Replaced {count} occurrence(s) in {output}");
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
||||
private static Command CreateFillTableCommand()
|
||||
{
|
||||
var inputOpt = new Option<string>("--input") { Description = "Input DOCX file", Required = true };
|
||||
var outputOpt = new Option<string>("--output") { Description = "Output file path" };
|
||||
var tableIndexOpt = new Option<int>("--table-index") { Description = "Zero-based index of the table to fill" };
|
||||
tableIndexOpt.DefaultValueFactory = _ => 0;
|
||||
var csvOpt = new Option<string>("--csv") { Description = "CSV file with data to fill", Required = true };
|
||||
var appendOpt = new Option<bool>("--append") { Description = "Append rows instead of replacing existing data rows" };
|
||||
|
||||
var cmd = new Command("fill-table", "Fill a table with data from CSV")
|
||||
{
|
||||
inputOpt, outputOpt, tableIndexOpt, csvOpt, appendOpt
|
||||
};
|
||||
|
||||
cmd.SetAction((parseResult) =>
|
||||
{
|
||||
var input = parseResult.GetValue(inputOpt)!;
|
||||
var output = parseResult.GetValue(outputOpt) ?? input;
|
||||
var tableIndex = parseResult.GetValue(tableIndexOpt);
|
||||
var csvPath = parseResult.GetValue(csvOpt)!;
|
||||
var append = parseResult.GetValue(appendOpt);
|
||||
|
||||
if (output != input) File.Copy(input, output, overwrite: true);
|
||||
|
||||
if (!File.Exists(csvPath)) { Console.Error.WriteLine($"CSV file not found: {csvPath}"); return; }
|
||||
|
||||
using var doc = WordprocessingDocument.Open(output, true);
|
||||
var body = doc.MainDocumentPart?.Document.Body;
|
||||
if (body == null) { Console.Error.WriteLine("No document body found."); return; }
|
||||
|
||||
var tables = body.Elements<Table>().ToList();
|
||||
if (tableIndex >= tables.Count)
|
||||
{
|
||||
Console.Error.WriteLine($"Table index {tableIndex} out of range (found {tables.Count} tables).");
|
||||
return;
|
||||
}
|
||||
|
||||
var table = tables[tableIndex];
|
||||
var csvLines = File.ReadAllLines(csvPath);
|
||||
if (csvLines.Length == 0) { Console.WriteLine("CSV is empty, nothing to fill."); return; }
|
||||
|
||||
// Get template row properties from the first data row (second row, after header)
|
||||
var existingRows = table.Elements<TableRow>().ToList();
|
||||
TableRow? templateRow = existingRows.Count > 1 ? existingRows[1] : existingRows.FirstOrDefault();
|
||||
var templateTrPr = templateRow?.TableRowProperties?.CloneNode(true) as TableRowProperties;
|
||||
|
||||
if (!append)
|
||||
{
|
||||
// Remove all rows except the header row
|
||||
for (int i = existingRows.Count - 1; i >= 1; i--)
|
||||
existingRows[i].Remove();
|
||||
}
|
||||
|
||||
int rowsAdded = 0;
|
||||
// Skip header line in CSV (index 0)
|
||||
for (int i = 1; i < csvLines.Length; i++)
|
||||
{
|
||||
var values = ParseCsvLine(csvLines[i]);
|
||||
var newRow = new TableRow();
|
||||
if (templateTrPr != null)
|
||||
newRow.Append(templateTrPr.CloneNode(true));
|
||||
|
||||
foreach (var val in values)
|
||||
{
|
||||
var cell = new TableCell(
|
||||
new Paragraph(new Run(new Text(val))));
|
||||
newRow.Append(cell);
|
||||
}
|
||||
|
||||
table.Append(newRow);
|
||||
rowsAdded++;
|
||||
}
|
||||
|
||||
doc.MainDocumentPart!.Document.Save();
|
||||
Console.WriteLine($"Added {rowsAdded} rows to table {tableIndex} in {output}");
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
||||
private static Command CreateInsertParagraphCommand()
|
||||
{
|
||||
var inputOpt = new Option<string>("--input") { Description = "Input DOCX file", Required = true };
|
||||
var outputOpt = new Option<string>("--output") { Description = "Output file path" };
|
||||
var textOpt = new Option<string>("--text") { Description = "Paragraph text", Required = true };
|
||||
var styleOpt = new Option<string>("--style") { Description = "Paragraph style (e.g. Heading1, Normal)" };
|
||||
var afterOpt = new Option<int>("--after-paragraph") { Description = "Insert after this paragraph index (0-based)" };
|
||||
afterOpt.DefaultValueFactory = _ => -1; // -1 = append at end
|
||||
|
||||
var cmd = new Command("insert-paragraph", "Insert a new paragraph")
|
||||
{
|
||||
inputOpt, outputOpt, textOpt, styleOpt, afterOpt
|
||||
};
|
||||
|
||||
cmd.SetAction((parseResult) =>
|
||||
{
|
||||
var input = parseResult.GetValue(inputOpt)!;
|
||||
var output = parseResult.GetValue(outputOpt) ?? input;
|
||||
var text = parseResult.GetValue(textOpt)!;
|
||||
var style = parseResult.GetValue(styleOpt);
|
||||
var afterIndex = parseResult.GetValue(afterOpt);
|
||||
|
||||
if (output != input) File.Copy(input, output, overwrite: true);
|
||||
|
||||
using var doc = WordprocessingDocument.Open(output, true);
|
||||
var body = doc.MainDocumentPart?.Document.Body;
|
||||
if (body == null) { Console.Error.WriteLine("No document body found."); return; }
|
||||
|
||||
var newPara = new Paragraph();
|
||||
if (!string.IsNullOrEmpty(style))
|
||||
newPara.Append(new ParagraphProperties(new ParagraphStyleId { Val = style }));
|
||||
newPara.Append(new Run(new Text(text)));
|
||||
|
||||
var paragraphs = body.Elements<Paragraph>().ToList();
|
||||
if (afterIndex >= 0 && afterIndex < paragraphs.Count)
|
||||
{
|
||||
paragraphs[afterIndex].InsertAfterSelf(newPara);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Insert before sectPr if present, otherwise append
|
||||
var sectPr = body.Elements<SectionProperties>().FirstOrDefault();
|
||||
if (sectPr != null)
|
||||
sectPr.InsertBeforeSelf(newPara);
|
||||
else
|
||||
body.Append(newPara);
|
||||
}
|
||||
|
||||
doc.MainDocumentPart!.Document.Save();
|
||||
Console.WriteLine($"Inserted paragraph in {output}");
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
||||
private static Command CreateUpdateFieldCommand()
|
||||
{
|
||||
var inputOpt = new Option<string>("--input") { Description = "Input DOCX file", Required = true };
|
||||
var outputOpt = new Option<string>("--output") { Description = "Output file path" };
|
||||
var fieldNameOpt = new Option<string>("--field") { Description = "Document property field name (e.g. TITLE, AUTHOR)", Required = true };
|
||||
var valueOpt = new Option<string>("--value") { Description = "New field value", Required = true };
|
||||
|
||||
var cmd = new Command("update-field", "Update a document property field value")
|
||||
{
|
||||
inputOpt, outputOpt, fieldNameOpt, valueOpt
|
||||
};
|
||||
|
||||
cmd.SetAction((parseResult) =>
|
||||
{
|
||||
var input = parseResult.GetValue(inputOpt)!;
|
||||
var output = parseResult.GetValue(outputOpt) ?? input;
|
||||
var fieldName = parseResult.GetValue(fieldNameOpt)!;
|
||||
var value = parseResult.GetValue(valueOpt)!;
|
||||
|
||||
if (output != input) File.Copy(input, output, overwrite: true);
|
||||
|
||||
using var doc = WordprocessingDocument.Open(output, true);
|
||||
|
||||
// Update core properties
|
||||
var props = doc.PackageProperties;
|
||||
switch (fieldName.ToUpperInvariant())
|
||||
{
|
||||
case "TITLE": props.Title = value; break;
|
||||
case "AUTHOR": props.Creator = value; break;
|
||||
case "SUBJECT": props.Subject = value; break;
|
||||
case "KEYWORDS": props.Keywords = value; break;
|
||||
case "DESCRIPTION": props.Description = value; break;
|
||||
case "CATEGORY": props.Category = value; break;
|
||||
default:
|
||||
Console.Error.WriteLine($"Unknown field: {fieldName}. Supported: TITLE, AUTHOR, SUBJECT, KEYWORDS, DESCRIPTION, CATEGORY");
|
||||
return;
|
||||
}
|
||||
|
||||
Console.WriteLine($"Updated {fieldName} to \"{value}\" in {output}");
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
||||
private static Command CreateListPlaceholdersCommand()
|
||||
{
|
||||
var inputOpt = new Option<string>("--input") { Description = "Input DOCX file", Required = true };
|
||||
var patternOpt = new Option<string>("--pattern") { Description = "Placeholder pattern (regex)" };
|
||||
patternOpt.DefaultValueFactory = _ => @"\{\{(\w+)\}\}"; // {{PLACEHOLDER}}
|
||||
|
||||
var cmd = new Command("list-placeholders", "List all placeholders found in the document")
|
||||
{
|
||||
inputOpt, patternOpt
|
||||
};
|
||||
|
||||
cmd.SetAction((parseResult) =>
|
||||
{
|
||||
var input = parseResult.GetValue(inputOpt)!;
|
||||
var pattern = parseResult.GetValue(patternOpt)!;
|
||||
|
||||
using var doc = WordprocessingDocument.Open(input, false);
|
||||
var body = doc.MainDocumentPart?.Document.Body;
|
||||
if (body == null) { Console.Error.WriteLine("No document body found."); return; }
|
||||
|
||||
var placeholders = new HashSet<string>();
|
||||
var regex = new Regex(pattern);
|
||||
|
||||
foreach (var paragraph in body.Descendants<Paragraph>())
|
||||
{
|
||||
var fullText = string.Concat(paragraph.Descendants<Text>().Select(t => t.Text));
|
||||
foreach (Match match in regex.Matches(fullText))
|
||||
{
|
||||
placeholders.Add(match.Value);
|
||||
}
|
||||
}
|
||||
|
||||
if (placeholders.Count == 0)
|
||||
{
|
||||
Console.WriteLine("No placeholders found.");
|
||||
return;
|
||||
}
|
||||
|
||||
Console.WriteLine($"Found {placeholders.Count} unique placeholder(s):");
|
||||
foreach (var p in placeholders.OrderBy(x => x))
|
||||
Console.WriteLine($" {p}");
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
||||
private static Command CreateFillPlaceholdersCommand()
|
||||
{
|
||||
var inputOpt = new Option<string>("--input") { Description = "Input DOCX file", Required = true };
|
||||
var outputOpt = new Option<string>("--output") { Description = "Output file path" };
|
||||
var mappingOpt = new Option<string>("--mapping") { Description = "JSON file mapping placeholder names to values", Required = true };
|
||||
var patternOpt = new Option<string>("--pattern") { Description = "Placeholder pattern with capture group for the name" };
|
||||
patternOpt.DefaultValueFactory = _ => @"\{\{(\w+)\}\}";
|
||||
|
||||
var cmd = new Command("fill-placeholders", "Replace placeholders with values from a mapping file")
|
||||
{
|
||||
inputOpt, outputOpt, mappingOpt, patternOpt
|
||||
};
|
||||
|
||||
cmd.SetAction((parseResult) =>
|
||||
{
|
||||
var input = parseResult.GetValue(inputOpt)!;
|
||||
var output = parseResult.GetValue(outputOpt) ?? input;
|
||||
var mappingPath = parseResult.GetValue(mappingOpt)!;
|
||||
var pattern = parseResult.GetValue(patternOpt)!;
|
||||
|
||||
if (!File.Exists(mappingPath)) { Console.Error.WriteLine($"Mapping file not found: {mappingPath}"); return; }
|
||||
|
||||
var mappingJson = File.ReadAllText(mappingPath);
|
||||
Dictionary<string, string> mapping;
|
||||
try
|
||||
{
|
||||
mapping = System.Text.Json.JsonSerializer.Deserialize<Dictionary<string, string>>(mappingJson) ?? [];
|
||||
}
|
||||
catch (System.Text.Json.JsonException ex)
|
||||
{
|
||||
Console.Error.WriteLine($"Invalid mapping JSON: {ex.Message}");
|
||||
return;
|
||||
}
|
||||
|
||||
if (output != input) File.Copy(input, output, overwrite: true);
|
||||
|
||||
using var doc = WordprocessingDocument.Open(output, true);
|
||||
var body = doc.MainDocumentPart?.Document.Body;
|
||||
if (body == null) { Console.Error.WriteLine("No document body found."); return; }
|
||||
|
||||
int totalReplacements = 0;
|
||||
var regex = new Regex(pattern);
|
||||
|
||||
foreach (var paragraph in body.Descendants<Paragraph>())
|
||||
{
|
||||
var fullText = string.Concat(paragraph.Descendants<Text>().Select(t => t.Text));
|
||||
var matches = regex.Matches(fullText);
|
||||
if (matches.Count == 0) continue;
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
var placeholderName = match.Groups.Count > 1 ? match.Groups[1].Value : match.Value;
|
||||
if (mapping.TryGetValue(placeholderName, out var replacement))
|
||||
{
|
||||
totalReplacements += ReplaceInParagraph(paragraph, match.Value, replacement, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
doc.MainDocumentPart!.Document.Save();
|
||||
Console.WriteLine($"Filled {totalReplacements} placeholder(s) in {output}");
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Replaces text within a paragraph while preserving run formatting.
|
||||
/// Handles the case where search text may span multiple runs.
|
||||
/// </summary>
|
||||
private static int ReplaceInParagraph(Paragraph paragraph, string search, string replace, bool useRegex)
|
||||
{
|
||||
var runs = paragraph.Elements<Run>().ToList();
|
||||
if (runs.Count == 0) return 0;
|
||||
|
||||
// Build the full paragraph text and a map from character index to (run, position within run)
|
||||
var fullText = string.Concat(runs.SelectMany(r => r.Elements<Text>().Select(t => t.Text)));
|
||||
if (string.IsNullOrEmpty(fullText)) return 0;
|
||||
|
||||
int count = 0;
|
||||
|
||||
if (!useRegex)
|
||||
{
|
||||
// Simple case: search within each run first
|
||||
foreach (var run in runs)
|
||||
{
|
||||
foreach (var textElement in run.Elements<Text>().ToList())
|
||||
{
|
||||
if (textElement.Text.Contains(search))
|
||||
{
|
||||
var newText = textElement.Text.Replace(search, replace);
|
||||
count += (textElement.Text.Length - newText.Length + replace.Length - search.Length) == 0 ? 0 :
|
||||
CountOccurrences(textElement.Text, search);
|
||||
textElement.Text = newText;
|
||||
if (newText.StartsWith(' ') || newText.EndsWith(' '))
|
||||
textElement.Space = SpaceProcessingModeValues.Preserve;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle cross-run matches by concatenating all runs, replacing, and rebuilding
|
||||
if (count == 0 && fullText.Contains(search))
|
||||
{
|
||||
var newFullText = fullText.Replace(search, replace);
|
||||
count = CountOccurrences(fullText, search);
|
||||
RebuildRunsWithText(paragraph, runs, newFullText);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
var regex = new Regex(search);
|
||||
if (regex.IsMatch(fullText))
|
||||
{
|
||||
count = regex.Matches(fullText).Count;
|
||||
var newFullText = regex.Replace(fullText, replace);
|
||||
RebuildRunsWithText(paragraph, runs, newFullText);
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Replaces the text content of existing runs with new text,
|
||||
/// preserving the formatting of the first run.
|
||||
/// </summary>
|
||||
private static void RebuildRunsWithText(Paragraph paragraph, List<Run> runs, string newText)
|
||||
{
|
||||
if (runs.Count == 0) return;
|
||||
|
||||
// Keep the first run's formatting, set its text to the full new text
|
||||
var firstRun = runs[0];
|
||||
var firstText = firstRun.Elements<Text>().FirstOrDefault();
|
||||
if (firstText != null)
|
||||
{
|
||||
firstText.Text = newText;
|
||||
if (newText.StartsWith(' ') || newText.EndsWith(' '))
|
||||
firstText.Space = SpaceProcessingModeValues.Preserve;
|
||||
}
|
||||
|
||||
// Remove all other runs
|
||||
for (int i = 1; i < runs.Count; i++)
|
||||
runs[i].Remove();
|
||||
}
|
||||
|
||||
private static int CountOccurrences(string text, string search)
|
||||
{
|
||||
int count = 0;
|
||||
int index = 0;
|
||||
while ((index = text.IndexOf(search, index, StringComparison.Ordinal)) != -1)
|
||||
{
|
||||
count++;
|
||||
index += search.Length;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private static string[] ParseCsvLine(string line)
|
||||
{
|
||||
// Simple CSV parser (handles quoted fields)
|
||||
var result = new List<string>();
|
||||
bool inQuotes = false;
|
||||
var current = new System.Text.StringBuilder();
|
||||
|
||||
for (int i = 0; i < line.Length; i++)
|
||||
{
|
||||
char c = line[i];
|
||||
if (c == '"')
|
||||
{
|
||||
if (inQuotes && i + 1 < line.Length && line[i + 1] == '"')
|
||||
{
|
||||
current.Append('"');
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
inQuotes = !inQuotes;
|
||||
}
|
||||
}
|
||||
else if (c == ',' && !inQuotes)
|
||||
{
|
||||
result.Add(current.ToString());
|
||||
current.Clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
current.Append(c);
|
||||
}
|
||||
}
|
||||
result.Add(current.ToString());
|
||||
return result.ToArray();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
using System.CommandLine;
|
||||
using System.IO.Compression;
|
||||
using System.Xml.Linq;
|
||||
|
||||
namespace MiniMaxAIDocx.Core.Commands;
|
||||
|
||||
public static class FixOrderCommand
|
||||
{
|
||||
private static readonly XNamespace W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
|
||||
|
||||
// Canonical element ordering within common parent elements per ISO 29500
|
||||
private static readonly Dictionary<string, List<string>> ElementOrder = new()
|
||||
{
|
||||
["pPr"] = new() { "pStyle", "keepNext", "keepLines", "pageBreakBefore", "widowControl", "numPr", "suppressLineNumbers", "pBdr", "shd", "tabs", "suppressAutoHyphens", "spacing", "ind", "jc", "outlineLvl", "rPr" },
|
||||
["rPr"] = new() { "rStyle", "rFonts", "b", "bCs", "i", "iCs", "caps", "smallCaps", "strike", "dstrike", "vanish", "color", "spacing", "w", "kern", "position", "sz", "szCs", "highlight", "u", "effect", "vertAlign", "lang" },
|
||||
["tblPr"] = new() { "tblStyle", "tblpPr", "tblOverlap", "tblW", "jc", "tblInd", "tblBorders", "shd", "tblLayout", "tblCellMar", "tblLook" },
|
||||
["tcPr"] = new() { "cnfStyle", "tcW", "gridSpan", "hMerge", "vMerge", "tcBorders", "shd", "noWrap", "tcMar", "textDirection", "tcFitText", "vAlign" },
|
||||
["sectPr"] = new() { "headerReference", "footerReference", "footnotePr", "endnotePr", "type", "pgSz", "pgMar", "paperSrc", "pgBorders", "lnNumType", "pgNumType", "cols", "docGrid" },
|
||||
};
|
||||
|
||||
public static Command Create()
|
||||
{
|
||||
var inputOption = new Option<string>("--input") { Description = "DOCX file to fix", Required = true };
|
||||
var outputOption = new Option<string>("--output") { Description = "Output path (default: overwrite input)" };
|
||||
var backupOption = new Option<bool>("--backup") { Description = "Create .bak before modifying", DefaultValueFactory = (_) => true };
|
||||
|
||||
var cmd = new Command("fix-order", "Fix OpenXML element ordering per ISO 29500")
|
||||
{
|
||||
inputOption, outputOption, backupOption
|
||||
};
|
||||
|
||||
cmd.SetAction((parseResult) =>
|
||||
{
|
||||
var input = parseResult.GetValue(inputOption)!;
|
||||
var output = parseResult.GetValue(outputOption) ?? input;
|
||||
var backup = parseResult.GetValue(backupOption);
|
||||
|
||||
if (!File.Exists(input))
|
||||
{
|
||||
Console.Error.WriteLine($"File not found: {input}");
|
||||
return;
|
||||
}
|
||||
|
||||
if (backup && output == input)
|
||||
File.Copy(input, input + ".bak", true);
|
||||
|
||||
var tempPath = Path.GetTempFileName();
|
||||
File.Copy(input, tempPath, true);
|
||||
|
||||
using var zip = ZipFile.Open(tempPath, ZipArchiveMode.Update);
|
||||
var entry = zip.GetEntry("word/document.xml");
|
||||
if (entry == null)
|
||||
{
|
||||
Console.Error.WriteLine("Not a valid DOCX");
|
||||
return;
|
||||
}
|
||||
|
||||
XDocument doc;
|
||||
using (var stream = entry.Open())
|
||||
doc = XDocument.Load(stream);
|
||||
|
||||
int reorderedCount = 0;
|
||||
|
||||
foreach (var (parentName, order) in ElementOrder)
|
||||
{
|
||||
foreach (var parent in doc.Descendants(W + parentName))
|
||||
{
|
||||
var children = parent.Elements().ToList();
|
||||
var sorted = children.OrderBy(e =>
|
||||
{
|
||||
var idx = order.IndexOf(e.Name.LocalName);
|
||||
return idx >= 0 ? idx : order.Count;
|
||||
}).ToList();
|
||||
|
||||
bool changed = false;
|
||||
for (int i = 0; i < children.Count; i++)
|
||||
{
|
||||
if (children[i] != sorted[i])
|
||||
{
|
||||
changed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (changed)
|
||||
{
|
||||
parent.ReplaceNodes(sorted);
|
||||
reorderedCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
entry.Delete();
|
||||
var newEntry = zip.CreateEntry("word/document.xml", CompressionLevel.Optimal);
|
||||
using (var stream = newEntry.Open())
|
||||
doc.Save(stream);
|
||||
|
||||
zip.Dispose();
|
||||
File.Copy(tempPath, output, true);
|
||||
File.Delete(tempPath);
|
||||
|
||||
Console.WriteLine($"Reordered {reorderedCount} element group(s)");
|
||||
Console.WriteLine($"Written to: {output}");
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,122 @@
|
||||
using System.CommandLine;
|
||||
using System.IO.Compression;
|
||||
using System.Xml.Linq;
|
||||
|
||||
namespace MiniMaxAIDocx.Core.Commands;
|
||||
|
||||
public static class MergeRunsCommand
|
||||
{
|
||||
private static readonly XNamespace W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
|
||||
|
||||
public static Command Create()
|
||||
{
|
||||
var inputOption = new Option<string>("--input") { Description = "DOCX file to optimize", Required = true };
|
||||
var outputOption = new Option<string>("--output") { Description = "Output path (default: overwrite input)" };
|
||||
var dryRunOption = new Option<bool>("--dry-run") { Description = "Report without modifying" };
|
||||
|
||||
var cmd = new Command("merge-runs", "Merge adjacent runs with identical formatting")
|
||||
{
|
||||
inputOption, outputOption, dryRunOption
|
||||
};
|
||||
|
||||
cmd.SetAction((parseResult) =>
|
||||
{
|
||||
var input = parseResult.GetValue(inputOption)!;
|
||||
var output = parseResult.GetValue(outputOption) ?? input;
|
||||
var dryRun = parseResult.GetValue(dryRunOption);
|
||||
|
||||
if (!File.Exists(input))
|
||||
{
|
||||
Console.Error.WriteLine($"File not found: {input}");
|
||||
return;
|
||||
}
|
||||
|
||||
var tempPath = Path.GetTempFileName();
|
||||
File.Copy(input, tempPath, true);
|
||||
|
||||
using var zip = ZipFile.Open(tempPath, ZipArchiveMode.Update);
|
||||
var entry = zip.GetEntry("word/document.xml");
|
||||
if (entry == null)
|
||||
{
|
||||
Console.Error.WriteLine("Not a valid DOCX: missing word/document.xml");
|
||||
return;
|
||||
}
|
||||
|
||||
XDocument doc;
|
||||
using (var stream = entry.Open())
|
||||
doc = XDocument.Load(stream);
|
||||
|
||||
int originalCount = 0;
|
||||
int mergedCount = 0;
|
||||
|
||||
foreach (var p in doc.Descendants(W + "p"))
|
||||
{
|
||||
var runs = p.Elements(W + "r").ToList();
|
||||
originalCount += runs.Count;
|
||||
|
||||
for (int i = runs.Count - 1; i > 0; i--)
|
||||
{
|
||||
var current = runs[i];
|
||||
var previous = runs[i - 1];
|
||||
|
||||
var curProps = current.Element(W + "rPr")?.ToString() ?? "";
|
||||
var prevProps = previous.Element(W + "rPr")?.ToString() ?? "";
|
||||
|
||||
if (curProps == prevProps)
|
||||
{
|
||||
// Only merge if both contain only text elements
|
||||
var curChildren = current.Elements().Where(e => e.Name != W + "rPr").ToList();
|
||||
var prevChildren = previous.Elements().Where(e => e.Name != W + "rPr").ToList();
|
||||
|
||||
if (curChildren.All(e => e.Name == W + "t") && prevChildren.All(e => e.Name == W + "t"))
|
||||
{
|
||||
var prevText = previous.Elements(W + "t").LastOrDefault();
|
||||
var curText = current.Elements(W + "t").FirstOrDefault();
|
||||
|
||||
if (prevText != null && curText != null)
|
||||
{
|
||||
prevText.Value += curText.Value;
|
||||
prevText.SetAttributeValue(XNamespace.Xml + "space", "preserve");
|
||||
|
||||
foreach (var extra in current.Elements(W + "t").Skip(1))
|
||||
{
|
||||
previous.Add(new XElement(extra));
|
||||
}
|
||||
|
||||
current.Remove();
|
||||
runs.RemoveAt(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mergedCount += runs.Count;
|
||||
}
|
||||
|
||||
if (dryRun)
|
||||
{
|
||||
Console.WriteLine($"Original runs: {originalCount}");
|
||||
Console.WriteLine($"After merge: {mergedCount}");
|
||||
Console.WriteLine($"Reduction: {(originalCount > 0 ? (originalCount - mergedCount) * 100.0 / originalCount : 0):F1}%");
|
||||
File.Delete(tempPath);
|
||||
return;
|
||||
}
|
||||
|
||||
entry.Delete();
|
||||
var newEntry = zip.CreateEntry("word/document.xml", CompressionLevel.Optimal);
|
||||
using (var stream = newEntry.Open())
|
||||
doc.Save(stream);
|
||||
|
||||
zip.Dispose();
|
||||
File.Copy(tempPath, output, true);
|
||||
File.Delete(tempPath);
|
||||
|
||||
Console.WriteLine($"Original runs: {originalCount}");
|
||||
Console.WriteLine($"After merge: {mergedCount}");
|
||||
Console.WriteLine($"Reduction: {(originalCount > 0 ? (originalCount - mergedCount) * 100.0 / originalCount : 0):F1}%");
|
||||
Console.WriteLine($"Written to: {output}");
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,107 @@
|
||||
using System.CommandLine;
|
||||
using System.Text.Json;
|
||||
using MiniMaxAIDocx.Core.Validation;
|
||||
|
||||
namespace MiniMaxAIDocx.Core.Commands;
|
||||
|
||||
public static class ValidateCommand
|
||||
{
|
||||
public static Command Create()
|
||||
{
|
||||
var inputOption = new Option<string>("--input") { Description = "DOCX file to validate", Required = true };
|
||||
var xsdOption = new Option<string>("--xsd") { Description = "XSD schema path for XML validation" };
|
||||
var businessOption = new Option<bool>("--business") { Description = "Run business rule validation" };
|
||||
var gateCheckOption = new Option<string>("--gate-check") { Description = "Template DOCX for gate-check validation" };
|
||||
var jsonOption = new Option<bool>("--json") { Description = "Output results as JSON" };
|
||||
|
||||
var cmd = new Command("validate", "Validate DOCX structure and content")
|
||||
{
|
||||
inputOption, xsdOption, businessOption, gateCheckOption, jsonOption
|
||||
};
|
||||
|
||||
cmd.SetAction((parseResult) =>
|
||||
{
|
||||
var input = parseResult.GetValue(inputOption)!;
|
||||
var xsd = parseResult.GetValue(xsdOption);
|
||||
var business = parseResult.GetValue(businessOption);
|
||||
var gateCheck = parseResult.GetValue(gateCheckOption);
|
||||
var asJson = parseResult.GetValue(jsonOption);
|
||||
|
||||
if (!File.Exists(input))
|
||||
{
|
||||
Console.Error.WriteLine($"File not found: {input}");
|
||||
return;
|
||||
}
|
||||
|
||||
var combinedResult = new ValidationResult();
|
||||
GateCheckResult? gateResult = null;
|
||||
|
||||
if (xsd != null)
|
||||
{
|
||||
var xsdValidator = new XsdValidator();
|
||||
combinedResult.Merge(xsdValidator.Validate(input, xsd));
|
||||
}
|
||||
|
||||
if (business)
|
||||
{
|
||||
var bizValidator = new BusinessRuleValidator();
|
||||
combinedResult.Merge(bizValidator.Validate(input));
|
||||
}
|
||||
|
||||
if (gateCheck != null)
|
||||
{
|
||||
var gateValidator = new GateCheckValidator();
|
||||
gateResult = gateValidator.Validate(input, gateCheck);
|
||||
}
|
||||
|
||||
if (asJson)
|
||||
{
|
||||
var output = new
|
||||
{
|
||||
isValid = combinedResult.IsValid && (gateResult?.Passed ?? true),
|
||||
errors = combinedResult.Errors,
|
||||
warnings = combinedResult.Warnings,
|
||||
gateCheck = gateResult == null ? null : new
|
||||
{
|
||||
passed = gateResult.Passed,
|
||||
violations = gateResult.Violations
|
||||
}
|
||||
};
|
||||
Console.WriteLine(JsonSerializer.Serialize(output, new JsonSerializerOptions { WriteIndented = true }));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (combinedResult.Errors.Count > 0)
|
||||
{
|
||||
Console.WriteLine($"ERRORS ({combinedResult.Errors.Count}):");
|
||||
foreach (var e in combinedResult.Errors)
|
||||
Console.WriteLine($" [{e.Severity}] {e.Message}" + (e.LineNumber > 0 ? $" (line {e.LineNumber}:{e.LinePosition})" : ""));
|
||||
}
|
||||
|
||||
if (combinedResult.Warnings.Count > 0)
|
||||
{
|
||||
Console.WriteLine($"WARNINGS ({combinedResult.Warnings.Count}):");
|
||||
foreach (var w in combinedResult.Warnings)
|
||||
Console.WriteLine($" [{w.Severity}] {w.Message}");
|
||||
}
|
||||
|
||||
if (gateResult != null)
|
||||
{
|
||||
Console.WriteLine(gateResult.Passed ? "GATE CHECK: PASSED" : "GATE CHECK: FAILED");
|
||||
foreach (var v in gateResult.Violations)
|
||||
Console.WriteLine($" - {v}");
|
||||
}
|
||||
|
||||
if (combinedResult.IsValid && (gateResult?.Passed ?? true))
|
||||
Console.WriteLine("Validation: PASSED");
|
||||
else
|
||||
Console.WriteLine("Validation: FAILED");
|
||||
}
|
||||
|
||||
if (!combinedResult.IsValid || gateResult is { Passed: false })
|
||||
Environment.ExitCode = 1;
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user