DocumentVisitorVisitParagraphStart Method |
Namespace: Aspose.Words
public void DocStructureToText() { // Open the document that has nodes we want to print the info of Document doc = new Document(MyDir + "DocumentVisitor-compatible features.docx"); // Create an object that inherits from the DocumentVisitor class DocStructurePrinter visitor = new DocStructurePrinter(); // Accepting a visitor lets it start traversing the nodes in the document, // starting with the node that accepted it to then recursively visit every child doc.Accept(visitor); // Once the visiting is complete, we can retrieve the result of the operation, // that in this example, has accumulated in the visitor Console.WriteLine(visitor.GetText()); } /// <summary> /// This Visitor implementation prints information about sections, bodies, paragraphs and runs encountered in the document. /// </summary> public class DocStructurePrinter : DocumentVisitor { public DocStructurePrinter() { mBuilder = new StringBuilder(); } /// <summary> /// Gets the plain text of the document that was accumulated by the visitor. /// </summary> public string GetText() { return mBuilder.ToString(); } /// <summary> /// Called when a Document node is encountered. /// </summary> public override VisitorAction VisitDocumentStart(Document doc) { int childNodeCount = doc.GetChildNodes(NodeType.Any, true).Count; // A Document node is at the root of every document, so if we let a document accept a visitor, this will be the first visitor action to be carried out IndentAndAppendLine("[Document start] Child nodes: " + childNodeCount); mDocTraversalDepth++; // Let the visitor continue visiting other nodes return VisitorAction.Continue; } /// <summary> /// Called when the visiting of a Document is ended. /// </summary> public override VisitorAction VisitDocumentEnd(Document doc) { // If we let a document accept a visitor, this will be the last visitor action to be carried out mDocTraversalDepth--; IndentAndAppendLine("[Document end]"); return VisitorAction.Continue; } /// <summary> /// Called when a Section node is encountered in the document. /// </summary> public override VisitorAction VisitSectionStart(Section section) { // Get the index of our section within the document NodeCollection docSections = section.Document.GetChildNodes(NodeType.Section, false); int sectionIndex = docSections.IndexOf(section); IndentAndAppendLine("[Section start] Section index: " + sectionIndex); mDocTraversalDepth++; return VisitorAction.Continue; } /// <summary> /// Called when the visiting of a Section node is ended. /// </summary> public override VisitorAction VisitSectionEnd(Section section) { mDocTraversalDepth--; IndentAndAppendLine("[Section end]"); return VisitorAction.Continue; } /// <summary> /// Called when a Body node is encountered in the document. /// </summary> public override VisitorAction VisitBodyStart(Body body) { int paragraphCount = body.Paragraphs.Count; IndentAndAppendLine("[Body start] Paragraphs: " + paragraphCount); mDocTraversalDepth++; return VisitorAction.Continue; } /// <summary> /// Called when the visiting of a Body node is ended. /// </summary> public override VisitorAction VisitBodyEnd(Body body) { mDocTraversalDepth--; IndentAndAppendLine("[Body end]"); return VisitorAction.Continue; } /// <summary> /// Called when a Paragraph node is encountered in the document. /// </summary> public override VisitorAction VisitParagraphStart(Paragraph paragraph) { IndentAndAppendLine("[Paragraph start]"); mDocTraversalDepth++; return VisitorAction.Continue; } /// <summary> /// Called when the visiting of a Paragraph node is ended. /// </summary> public override VisitorAction VisitParagraphEnd(Paragraph paragraph) { mDocTraversalDepth--; IndentAndAppendLine("[Paragraph end]"); return VisitorAction.Continue; } /// <summary> /// Called when a Run node is encountered in the document. /// </summary> public override VisitorAction VisitRun(Run run) { IndentAndAppendLine("[Run] \"" + run.GetText() + "\""); return VisitorAction.Continue; } /// <summary> /// Called when a SubDocument node is encountered in the document. /// </summary> public override VisitorAction VisitSubDocument(SubDocument subDocument) { IndentAndAppendLine("[SubDocument]"); return VisitorAction.Continue; } /// <summary> /// Append a line to the StringBuilder and indent it depending on how deep the visitor is into the document tree. /// </summary> /// <param name="text"></param> private void IndentAndAppendLine(string text) { for (int i = 0; i < mDocTraversalDepth; i++) mBuilder.Append("| "); mBuilder.AppendLine(text); } private int mDocTraversalDepth; private readonly StringBuilder mBuilder; }
public void RemoveHiddenContentFromDocument() { // Open the document we want to remove hidden content from. Document doc = new Document(MyDir + "Hidden content.docx"); // Create an object that inherits from the DocumentVisitor class RemoveHiddenContentVisitor hiddenContentRemover = new RemoveHiddenContentVisitor(); // This is the well known Visitor pattern. Get the model to accept a visitor // The model will iterate through itself by calling the corresponding methods // on the visitor object (this is called visiting) // We can run it over the entire the document like so doc.Accept(hiddenContentRemover); // Or we can run it on only a specific node Paragraph para = (Paragraph) doc.GetChild(NodeType.Paragraph, 4, true); para.Accept(hiddenContentRemover); // Or over a different type of node like below Table table = (Table) doc.GetChild(NodeType.Table, 0, true); table.Accept(hiddenContentRemover); doc.Save(ArtifactsDir + "Font.RemoveHiddenContentFromDocument.doc"); } /// <summary> /// This class when executed will remove all hidden content from the Document. Implemented as a Visitor. /// </summary> public class RemoveHiddenContentVisitor : DocumentVisitor { /// <summary> /// Called when a FieldStart node is encountered in the document. /// </summary> public override VisitorAction VisitFieldStart(FieldStart fieldStart) { // If this node is hidden, then remove it. if (IsHidden(fieldStart)) fieldStart.Remove(); return VisitorAction.Continue; } /// <summary> /// Called when a FieldEnd node is encountered in the document. /// </summary> public override VisitorAction VisitFieldEnd(FieldEnd fieldEnd) { if (IsHidden(fieldEnd)) fieldEnd.Remove(); return VisitorAction.Continue; } /// <summary> /// Called when a FieldSeparator node is encountered in the document. /// </summary> public override VisitorAction VisitFieldSeparator(FieldSeparator fieldSeparator) { if (IsHidden(fieldSeparator)) fieldSeparator.Remove(); return VisitorAction.Continue; } /// <summary> /// Called when a Run node is encountered in the document. /// </summary> public override VisitorAction VisitRun(Run run) { if (IsHidden(run)) run.Remove(); return VisitorAction.Continue; } /// <summary> /// Called when a Paragraph node is encountered in the document. /// </summary> public override VisitorAction VisitParagraphStart(Paragraph paragraph) { if (IsHidden(paragraph)) paragraph.Remove(); return VisitorAction.Continue; } /// <summary> /// Called when a FormField is encountered in the document. /// </summary> public override VisitorAction VisitFormField(FormField formField) { if (IsHidden(formField)) formField.Remove(); return VisitorAction.Continue; } /// <summary> /// Called when a GroupShape is encountered in the document. /// </summary> public override VisitorAction VisitGroupShapeStart(GroupShape groupShape) { if (IsHidden(groupShape)) groupShape.Remove(); return VisitorAction.Continue; } /// <summary> /// Called when a Shape is encountered in the document. /// </summary> public override VisitorAction VisitShapeStart(Shape shape) { if (IsHidden(shape)) shape.Remove(); return VisitorAction.Continue; } /// <summary> /// Called when a Comment is encountered in the document. /// </summary> public override VisitorAction VisitCommentStart(Comment comment) { if (IsHidden(comment)) comment.Remove(); return VisitorAction.Continue; } /// <summary> /// Called when a Footnote is encountered in the document. /// </summary> public override VisitorAction VisitFootnoteStart(Footnote footnote) { if (IsHidden(footnote)) footnote.Remove(); return VisitorAction.Continue; } /// <summary> /// Called when visiting of a Table node is ended in the document. /// </summary> public override VisitorAction VisitTableEnd(Table table) { // At the moment there is no way to tell if a particular Table/Row/Cell is hidden. // Instead, if the content of a table is hidden, then all inline child nodes of the table should be // hidden and thus removed by previous visits as well. This will result in the container being empty // so if this is the case we know to remove the table node. // // Note that a table which is not hidden but simply has no content will not be affected by this algorithm, // as technically they are not completely empty (for example a properly formed Cell will have at least // an empty paragraph in it) if (!table.HasChildNodes) table.Remove(); return VisitorAction.Continue; } /// <summary> /// Called when visiting of a Cell node is ended in the document. /// </summary> public override VisitorAction VisitCellEnd(Cell cell) { if (!cell.HasChildNodes && cell.ParentNode != null) cell.Remove(); return VisitorAction.Continue; } /// <summary> /// Called when visiting of a Row node is ended in the document. /// </summary> public override VisitorAction VisitRowEnd(Row row) { if (!row.HasChildNodes && row.ParentNode != null) row.Remove(); return VisitorAction.Continue; } /// <summary> /// Called when a SpecialCharacter is encountered in the document. /// </summary> public override VisitorAction VisitSpecialChar(SpecialChar specialChar) { if (IsHidden(specialChar)) specialChar.Remove(); return VisitorAction.Continue; } /// <summary> /// Returns true if the node passed is set as hidden, returns false if it is visible. /// </summary> private static bool IsHidden(Node node) { switch (node) { case Inline currentNode: // If the node is Inline then cast it to retrieve the Font property which contains the hidden property return currentNode.Font.Hidden; default: switch (node.NodeType) { case NodeType.Paragraph: { // If the node is a paragraph cast it to retrieve the ParagraphBreakFont which contains the hidden property Paragraph para = (Paragraph) node; return para.ParagraphBreakFont.Hidden; } default: switch (node) { case ShapeBase shape: // Node is a shape or groupshape return shape.Font.Hidden; case InlineStory inlineStory: // Node is a comment or footnote return inlineStory.Font.Hidden; } break; } break; } // A node that is passed to this method which does not contain a hidden property will end up here // By default nodes are not hidden so return false return false; } }