PageSplitter.java example

Explorer

Aspose_Words_Java-master
- Aspose.Words-for-Java-master
  - Examples
    - src
      - main
        java
        com
        aspose
        words
        examples
        Utils.java
        document_object_model
        AccessParentNode.java
        ChildNodes.java
        CreateAndAddParagraphNode.java
        GetNodeType.java
        OwnerDocument.java
        SiblingNodes.java
        TypedAccessToChildrenAndParent.java
        linq
        BubbleChart.java
        BulletedList.java
        ChartWithFilteringGroupingOrdering.java
        Client.java
        Common.java
        CommonList.java
        CommonMasterDetail.java
        Contract.java
        HelloWorld.java
        InParagraphList.java
        InTableAlternateContent.java
        InTableList.java
        InTableMasterDetail.java
        InTableRow.java
        InTableWithFilteringGroupingSorting.java
        Manager.java
        MulticoloredNumberedList.java
        NumberedList.java
        PieChart.java
        ScatterChart.java
        Sender.java
        SingleRow.java
        loading_saving
        AccessAndVerifySignature.java
        CheckFormat.java
        CheckFormatCompatibility.java
        ConvertADocumentToMHTMLAndEmail.java
        ConvertDocumentToByte.java
        ConvertDocumentToEPUB.java
        ConvertDocumentToEPUBUysingDefaultSaveOptions.java
        ConvertDocumentToHtmlWithRoundtrip.java
        ConvertImageToPdf.java
        CreateDocument.java
        DetectDocumentSignatures.java
        DetectFileFormat.java
        DigitallySignedPdf.java
        Doc2PDF.java
        ExportFontsAsBase64.java
        ExportResourcesUsingHtmlSaveOptions.java
        GetListOfFilesInFolder.java
        ImageToPdf.java
        LargeSizeImageToPdf.java
        LoadAndSave.java
        LoadAndSaveToStream.java
        LoadDocFromDatabase.java
        LoadEncryptedDoc.java
        LoadTxt.java
        OpenDocUsingStream.java
        OpenDocument.java
        OpenEncryptedDocument.java
        PageSplitter.java
        SaveDocToDatabase.java
        SendToClientBrowser.java
        SpecifySaveOption.java
        SplitIntoHtmlPages.java
        Word2Help.java
        mail_merge
        AdvancedMailMergeFeatures.java
        ApplyCustomFormattingDuringMailMerge.java
        ApplyCustomLogicToEmptyRegions.java
        ExecuteMailMergeWithRegions.java
        ExecuteSimpleMailMerge.java
        InsertCheckBoxesOrHTMLDuringMailMerge.java
        InsertImagesFromADatabase.java
        MailMergeFormFields.java
        MailMergeFromXMLUsingIMailMergeDataSource.java
        MailMergeUsingMustacheTemplateSyntax.java
        MultipleDocsInMailMerge.java
        MustacheTemplateSyntax.java
        NestedMailMergeRegions.java
        ProduceMultipleDocumentsDuringMailMerge.java
        RemoveEmptyRegions.java
        RemoveRowsFromTable.java
        RemoveUnmergedRegions.java
        XMLMailMerge.java
        XmlMailMergeDataSet.java
        XmlMailMergeDataTable.java
        programming_documents
        HeadersAndFooters
        CreateHeadersFootersUsingDocumentBuilder.java
        RemoveFootersButLeaveHeadersIntact.java
        Hyperlink
        ReplaceHyperlinks.java
        Ranges
        RangesDeleteText.java
        RangesGetText.java
        Theme
        GetThemeProperties.java
        SetThemeProperties.java
        bookmarks
        AccessBookmarks.java
        BookmarkNameAndText.java
        BookmarkTable.java
        CopyBookmarkedText.java
        CreateBookmark.java
        GetAndSetBookmarkNameAndText.java
        InsertBookmarksWithWhiteSpaces.java
        ObtainBookmarkByIndexAndName.java
        UntangleRowBookmarks.java
        charts
        ChartAppearance.java
        OOXMLCharts.java
        WorkWithChartDataLabelOfASingleChartSeries.java
        WorkWithChartSeriesCollectionOfChart.java
        WorkWithSingleChartDataPointOfAChartSeries.java
        comments
        AddComments.java
        AnchorComment.java
        CreateSimpleDocumentUsingDocumentBuilder.java
        ExtractCommentsByAuthor.java
        ProcessComments.java
        RemoveCommentRegionText.java
        RemoveComments.java
        RemoveCommentsByAuthor.java
        document
        AccessStyles.java
        AddGroupShape.java
        CheckBoxTypeContentControl.java
        CloneDocument.java
        ComboBoxContentControl.java
        CompareTwoWordDocuments.java
        ConvertBetweenMeasurementUnits.java
        DocumentBuilderApplyBordersAndShadingToParagraph.java
        DocumentBuilderApplyParagraphStyle.java
        DocumentBuilderBuildTable.java
        DocumentBuilderCursorPosition.java
        DocumentBuilderHeadersAndFooters.java
        DocumentBuilderInsertBookmark.java
        DocumentBuilderInsertBreak.java
        DocumentBuilderInsertCheckBoxFormField.java
        DocumentBuilderInsertComboBoxFormField.java
        DocumentBuilderInsertField.java
        DocumentBuilderInsertFloatingImage.java
        DocumentBuilderInsertHtml.java
        DocumentBuilderInsertHyperlink.java
        DocumentBuilderInsertInlineImage.java
        DocumentBuilderInsertOleObject.java
        DocumentBuilderInsertParagraph.java
        DocumentBuilderInsertTableOfContents.java
        DocumentBuilderInsertTextInputFormField.java
        DocumentBuilderMoveToBookmark.java
        DocumentBuilderMoveToBookmarkEnd.java
        DocumentBuilderMoveToDocumentStartEnd.java
        DocumentBuilderMoveToMergeField.java
        DocumentBuilderMoveToNode.java
        DocumentBuilderMoveToParagraph.java
        DocumentBuilderMoveToSection.java
        DocumentBuilderMoveToTableCell.java
        DocumentBuilderSetFontFormatting.java
        DocumentBuilderSetMultilevelListFormatting.java
        DocumentBuilderSetPageSetupAndSectionFormatting.java
        DocumentBuilderSetParagraphFormatting.java
        DocumentBuilderSetTableCellFormatting.java
        DocumentBuilderSetTableRowFormatting.java
        DocumentInDB.java
        ExtractContentBetweenBlockLevelNodes.java
        ExtractContentBetweenBookmarks.java
        ExtractContentBetweenCommentRange.java
        ExtractContentBetweenParagraphStyles.java
        ExtractContentBetweenParagraphs.java
        ExtractContentBetweenRuns.java
        ExtractContentUsingDocumentVisitor.java
        ExtractContentUsingField.java
        ExtractTextOnly.java
        GenerateACustomBarCodeImage.java
        GetDocumentVariables.java
        InsertDocumentIntoAnotherDocument.java
        ModifyContentControls.java
        ProtectDocument.java
        RemovePageAndSectionBreaks.java
        RichTextBoxContentControl.java
        SetCurrentStateOfCheckBox.java
        SetViewOptions.java
        TrackChanges.java
        UseControlCharacters.java
        WriteAndFont.java
        properties
        AccessingDocumentProperties.java
        AddOrRemoveDocumentProperties.java
        fields
        ChangeFieldUpdateCultureSource.java
        ConvertFieldsInBody.java
        ConvertFieldsInDocument.java
        ConvertFieldsInParagraph.java
        FieldHelper.java
        FormFieldsGetByName.java
        FormFieldsGetFormFieldsCollection.java
        FormFieldsWorkWithProperties.java
        GetFieldNames.java
        InsertAuthorField.java
        InsertField.java
        InsertFormFields.java
        InsertMailMergeAddressBlockFieldUsingDOM.java
        InsertMergeFieldUsingDOM.java
        InsertNestedFields.java
        RemoveField.java
        RenameMergeFields.java
        SpecifylocaleAtFieldlevel.java
        UpdateDocFields.java
        UpdateFields.java
        UseOfficeMathProperties.java
        find_replace
        FindAndHighlightText.java
        ReplaceTextWithField.java
        ReplaceWithEvaluator.java
        ReplaceWithRegex.java
        ReplaceWithString.java
        images
        AddWatermarkToADocument.java
        CompressImages.java
        ExtractImagesToFiles.java
        InsertBarcodeImage.java
        RemoveWatermark.java
        joining_appending
        AppendDocumentManually.java
        BaseDocument.java
        ConvertNumPageFields.java
        ConvertNumPageFieldsToPageRef.java
        ConvertNumPageFieldsWithPageRef.java
        DifferentPageSetup.java
        GetRemoveField.java
        JoinContinuous.java
        JoinNewPage.java
        KeepSourceFormatting.java
        KeepSourceTogether.java
        LinkHeadersFooters.java
        ListKeepSourceFormatting.java
        ListUseDestinationStyles.java
        RemoveSourceHeadersFooters.java
        RestartPageNumbering.java
        SimpleAppendDocument.java
        UnlinkHeadersFooters.java
        UpdatePageLayout.java
        UseDestinationStyles.java
        sections
        AddSection.java
        AppendSectionContent.java
        CloneSection.java
        CopySection.java
        DeleteAllSections.java
        DeleteHeaderFooterContent.java
        DeleteSection.java
        DeleteSectionContent.java
        RemoveAllSections.java
        SectionsAccessByIndex.java
        styles
        ExtractContentBasedOnStyles.java
        tableofcontents
        FindAndInsertATCField.java
        InsertATableOfContentsField.java
        InsertATableOfContentsUsingHeadingStyles.java
        InsertTCField.java
        ModifyATableOfContents.java
        RemoveATableOfContents.java
        tables
        ApplyFormatting
        ApplyBordersAndShading.java
        ApplyFormattingOnTheCellLevel.java
        ApplyFormattingOnTheRowLevel.java
        ApplyFormattingOnTheTableLevel.java
        FindPreferredWidthTypeAndValueOfATableOrCell.java
        SpecifyAPreferredWidthOnATable.java
        SpecifyPreferredWidthOnACell.java
        SpecifyRowHeights.java
        TableStyles.java
        ColumnsAndRows
        ApplyAutoFitSettingsToATable.java
        CheckCellsMerged.java
        Column.java
        FindIndexOfTableElements.java
        JoinAndSplitTables.java
        KeepTablesAndRowsFromBreakingAcrossPages.java
        MergeCellsInARange.java
        MergeCellsInATable.java
        SpecifyRowsToRepeatOnSubsequentPagesAsHeaderRows.java
        WorkingWithColumns.java
        ExtractOrReplaceText
        ExtractPlainTextFromATable.java
        ReplaceText.java
        creation
        BuildTableFromDataTable.java
        FormattedTable.java
        InsertCloneOfExistingTable.java
        InsertTableDirectlyIntoDOM.java
        InsertTableFromHtml.java
        NestedTable.java
        SimpleTable.java
        quickstart
        AppendDocuments.java
        ApplyLicense.java
        FindAndReplace.java
        HelloWorld.java
        LoadAndSaveToDisk.java
        LoadAndSaveToStream.java
        SimpleMailMerge.java
        UpdateFields.java
        WorkingWithNodes.java
        rendering_printing
        ControlEmbeddingOfCoreAndSystemFonts.java
        DocumentPreviewAndPrint.java
        EmbedFontsInAdobePDF.java
        HandleDocumentWarnings.java
        HyphenateWords.java
        MultipagePrintDocument.java
        MultiplePagesOnSheet.java
        PrintPreviewDialog.java
        ReceiveNotificationOfMissingFontsAndFontSubstitution.java
        RenderShapes.java
        SaveAsMultipageTiff.java
        SpecifyDefaultFontToUseWhenRendering.java
        SpecifyTrueTypeFontsLocation.java
        viewers_visualizers
        document_explorer
        About.java
        AboutForm.java
        Dialogs.java
        DocumentExplorer.java
        DocumentItems.java
        ErrorDialog.java
        ErrorDialogForm.java
        Globals.java
        Item.java
        Main.java
        MainForm.java
        OpenFileFilter.java
        SaveDialogChangeListener.java
        SaveFileFilter.java
        Utils.java
  - Plugins
    - Aspose-Words-Java-for-NetBeans(Maven)
      - src
        com
        aspose
        words
        maven
        AsposeMavenBasicPanelVisual.java
        AsposeMavenBasicWizardPanel.java
        AsposeMavenProjectWizardIterator.java
        MavenSettings.java
        artifacts
        Metadata.java
        ObjectFactory.java
        examples
        AsposeExamplePanel.java
        AsposeExampleWizardIterator.java
        AsposeExampleWizardPanel.java
        CustomMutableTreeNode.java
        utils
        AbstractTask.java
        AsposeConstants.java
        AsposeJavaAPI.java
        AsposeMavenProjectManager.java
        AsposeWordsJavaAPI.java
        FormatExamples.java
        GitHelper.java
        TasksExecutor.java
    - Aspose.Words Java for dotCMS
      - AsposeDotCMSExportToWord.servlet
        src
        main
        java
        com
        aspose
        dotcms
        words
        Activator.java
        Constants.java
        ExportToWordServlet.java
        TestFilter.java
    - Aspose_Words_Java_for_Docx4j
      - src
        main
        java
        com
        aspose
        words
        examples
        Utils.java
        asposefeatures
        documents
        clonedocuments
        AsposeCloneDoc.java
        movingcursorindocs
        AsposeMovingCursor.java
        protectdocuments
        AsposeProtectDoc.java
        setpageborders
        AsposePageBorders.java
        trackchanges
        AsposeTrackChanges.java
        usingcontrolcharacters
        AsposeUseControlCharacters.java
        loadingsavingandconverting
        checkformatcompatibility
        AsposeCheckFormatCompatibility.java
        digitalsignature
        AsposeDigitalSignatures.java
        loadtxtfile
        AsposeLoadTxtFile.java
        mailmerge
        mailmergefromxmldatasource
        XMLMailMerge.java
        XmlMailMergeDataSet.java
        XmlMailMergeDataTable.java
        renderingandprinting
        specifydefaultfonts
        AsposeSpecifyDefaultFontswhileRendering.java
        tables
        autofitsettingstotable
        AsposeTableAutoFitSettings.java
        joiningtables
        AsposeJoiningTables.java
        repeatheaderrow
        AsposeRepeatHeaderRow.java
        splittables
        AsposeSplittingTables.java
        featurescomparison
        bookmarks
        addbookmark
        AsposeBookmarks.java
        Docx4jBookmarkAdd.java
        deletebookmark
        AsposeBookmarksDelete.java
        Docx4jBookmarksDeleter.java
        documents
        accessdocproperties
        AsposeWorkingWithDocProps.java
        Docx4jWorkingWithDocProps.java
        addimage
        AsposeInsertImage.java
        Docx4jImageAdd.java
        addwatermark
        AsposeAddWatermark.java
        Docx4jAddWatermark.java
        comments
        AsposeComments.java
        Docx4jCommentsSample.java
        converttoformats
        AsposeConvertToFormats.java
        Docx4jConvertOutHtml.java
        SampleDocument.java
        converttopdf
        AsposeConvertToFormats.java
        Docx4jConvertToPDF.java
        SampleDocument.java
        createnewdoc
        AsposeNewDocument.java
        Docx4jNewDocument.java
        inserthyperlinks
        AsposeAddHyperlink.java
        Docx4jHyperlinks.java
        inserttableofcontents
        AsposeTableOfContents.java
        Docx4jTableOfContents.java
        mergedocs
        AsposeAppendDocs.java
        Docx4jMergeDocx.java
        headerfooter
        addfooter
        AsposeFooters.java
        Docx4jFooterCreate.java
        addheader
        AsposeHeaders.java
        Docx4jHeaderCreate.java
        removeheaderfooter
        AsposeHeaderFooterRemove.java
        Docx4jHeaderFooterRemove.java
    - Aspose_Words_Java_for_Eclipse_Maven
      - AsposeWordsEclipsePlugin
        src
        com
        aspose
        words
        Activator.java
        MavenSettings.java
        maven
        AsposeMavenProjectSupport.java
        AsposeMavenProjectWizard.java
        AsposeMavenProjectWizardPage.java
        artifacts
        Metadata.java
        ObjectFactory.java
        examples
        AsposeExampleSupport.java
        AsposeExampleWizard.java
        AsposeExampleWizardPage.java
        utils
        AsposeConstants.java
        AsposeJavaAPI.java
        AsposeMavenProjectManager.java
        AsposeWordsJavaAPI.java
        FormatExamples.java
        GitHelper.java
        MavenSettings.java
        org
        eclipse
        wb
        swt
        SWTResourceManager.java
    - Aspose_Words_Java_for_IntelliJ(Maven)
      - src
        com
        aspose
        examples
        AsposeExampleAction.java
        AsposeExampleCallback.java
        AsposeExampleDialog.java
        AsposeExamplePanel.java
        CustomMutableTreeNode.java
        maven
        apis
        artifacts
        Metadata.java
        ObjectFactory.java
        utils
        AsposeConstants.java
        AsposeJavaAPI.java
        AsposeMavenProjectManager.java
        AsposeMavenUtil.java
        AsposeWordsJavaAPI.java
        FormatExamples.java
        GitHelper.java
        execution
        CallBackHandler.java
        ModalTaskImpl.java
        RunnableHelper.java
        wizards
        maven
        AsposeIntroWizardStep.java
        AsposeMavenModuleBuilder.java
        AsposeMavenModuleBuilderHelper.java
        AsposeMavenModuleWizardStep.java
        CreateMavenProjectCallback.java
        DownloadExamplesCallback.java
        MavenId.java
        icons
        AsposeIcons.java
    - Aspose_Words_for_Apache_POI
      - src
        main
        java
        com
        aspose
        words
        examples
        Utils.java
        asposefeatures
        mailmerge
        mailmergefromxmldatasource
        XMLMailMerge.java
        XmlMailMergeDataSet.java
        XmlMailMergeDataTable.java
        workingwithbarcode
        insertbarcodeoneachpage
        AsposeInsertBarcodeOnEachPage.java
        workingwithdocument
        addwatermark
        AsposeWatermarks.java
        appenddoc
        AsposeAppendDocs.java
        checkformatcompatibility
        AsposeCheckFormatCompatibility.java
        clonedoc
        AsposeCloneDoc.java
        insertpicture
        AsposeInsertImage.java
        loadtxtfile
        AsposeLoadTxtFile.java
        movingcursor
        AsposeMovingCursor.java
        protectdoc
        AsposeProtectDoc.java
        setpageborders
        AsposePageBorders.java
        trackchanges
        AsposeTrackChanges.java
        workingwithbookmarks
        AsposeBookmarks.java
        workingwithdigitalsignature
        AsposeDigitalSignatures.java
        workingwithfields
        insertfieldsindoc
        AsposeInsertFields.java
        removefields
        AsposeRemoveFields.java
        workingwithtables
        autofitsettingstotable
        AsposeTableAutoFitSettings.java
        joiningtables
        AsposeJoiningTables.java
        splittables
        AsposeSplittingTables.java
        workingwithtext
        extractcomments
        AsposeExtractComments.java
        findnreplacetxt
        AsposeFindnReplace.java
        insertcomments
        AsposeInsertComments.java
        removecomments
        AsposeRemoveComments.java
        specifydefaultfonts
        AsposeSpecifyDefaultFontswhileRendering.java
        usingcontrolcharacters
        AsposeUseControlCharacters.java
        featurescomparison
        converter
        ApacheConvertToFormats.java
        AsposeConvertToFormats.java
        document
        ApacheDocumentProperties.java
        ApacheFormattedText.java
        ApacheNewDocument.java
        ApacheOpenExistingDoc.java
        ApacheSaveDocument.java
        AsposeDocumentProperties.java
        AsposeFormattedText.java
        AsposeNewDocument.java
        AsposeOpenExistingDoc.java
        AsposeSaveDocument.java
        headerfooter
        ApacheFooters.java
        ApacheHeaders.java
        AsposeFooters.java
        AsposeHeaders.java
        images
        ApacheExtractImages.java
        ApacheInsertImage.java
        AsposeExtractImages.java
        AsposeInsertImage.java
        ranges
        ApacheDeleteRange.java
        ApacheInsert.java
        ApacheRanges.java
        AsposeDeleteRange.java
        AsposeInsert.java
        AsposeRanges.java
        tables
        ApacheCreateTable.java
        ApacheFormattedTable.java
        AsposeCreateTable.java
        AsposeFormatedTable.java
    - Aspose_Words_for_Struts
      - src
        main
        java
        com
        books
        AsposeAPIHelper.java
        BookActions.java
        BookForm.java
        Books.java
        ShowBooks.java

package com.aspose.words.examples.loading_saving;

import com.aspose.words.*;
import com.aspose.words.examples.Utils;
import com.sun.media.jfxmediaimpl.MediaUtils;
import javafx.scene.shape.Path;

import java.io.File;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Hashtable;
import java.util.Stack;


public class PageSplitter
{
    public static void main(String[] args) throws Exception
    {
        // The path to the documents directory.
        String dataDir = Utils.getDataDir(PageSplitter.class);

        SplitAllDocumentsToPages(dataDir);
        System.out.println("\nDocument split to pages successfully.\nFile saved at " + dataDir + "\\Out");
    }

    public static void SplitDocumentToPages(File docName) throws Exception
    {
        String folderName = docName.getParent();
        String fileName =  docName.getName();
        String extensionName = fileName.substring(fileName.lastIndexOf("."));
        String outFolder = new File(folderName, "Out").getAbsolutePath();
        System.out.println("Processing document: " + fileName );


        Document doc = new Document(docName.getAbsolutePath());

        // Create and attach collector to the document before page layout is built.
        LayoutCollector layoutCollector = new LayoutCollector(doc);

        // This will build layout model and collect necessary information.
        doc.updatePageLayout();

        // Split nodes in the document into separate pages.
        DocumentPageSplitter splitter = new DocumentPageSplitter(layoutCollector);

        // Save each page to the disk as a separate document.
        for (int page = 1; page <= doc.getPageCount(); page++)
        {
            Document pageDoc = splitter.GetDocumentOfPage(page);
            pageDoc.save(new File(outFolder, MessageFormat.format("{0} - page{1} Out{2}", fileName, page, extensionName)).getAbsolutePath());
        }

        // Detach the collector from the document.
        layoutCollector.setDocument(null);
    }

    public static void SplitAllDocumentsToPages(String folderName) throws Exception
    {
        File[] files = new File(folderName).listFiles();

        for (File file : files) {
            if (file.isFile()) {
                SplitDocumentToPages(file);
            }
        }
    }
}

class DocumentPageSplitter {
    /**
     * Initializes new instance of this class. This method splits the document into sections so that each page
     * begins and ends at a section boundary. It is recommended not to modify the document afterwards.
     */
    public DocumentPageSplitter(LayoutCollector collector) throws Exception {
        mPageNumberFinder = new PageNumberFinder(collector);
        mPageNumberFinder.SplitNodesAcrossPages();
    }

    /**
     * Gets the document of a page.
     */
    public Document GetDocumentOfPage(int pageIndex) throws Exception {
        return GetDocumentOfPageRange(pageIndex, pageIndex);
    }

    /**
     * Gets the document of a page range.
     */
    public Document GetDocumentOfPageRange(int startIndex, int endIndex) throws Exception {
        Document result = (Document) getDocument().deepClone(false);

        for (Section section : (Iterable<Section>) mPageNumberFinder.RetrieveAllNodesOnPages(startIndex, endIndex, NodeType.SECTION))
            result.appendChild(result.importNode(section, true));

        return result;
    }

    /**
     * Gets the document this instance works with.
     */
    private Document getDocument() {
        return mPageNumberFinder.getDocument();
    }

    private PageNumberFinder mPageNumberFinder;
}

class PageNumberFinder {

    /**
     * Initializes new instance of this class.
     */
    public PageNumberFinder(LayoutCollector collector) {
        mCollector = collector;
    }

    /**
     * Retrieves 1-based index of a page that the node begins on.
     */
    public int GetPage(Node node) throws Exception {
        if (mNodeStartPageLookup.containsKey(node))
            return (Integer) mNodeStartPageLookup.get(node);

        return mCollector.getStartPageIndex(node);
    }

    /**
     * Retrieves 1-based index of a page that the node ends on.
     */
    public int GetPageEnd(Node node) throws Exception {
        if (mNodeEndPageLookup.containsKey(node))
            return (Integer) mNodeEndPageLookup.get(node);

        return mCollector.getEndPageIndex(node);
    }

    /**
     * Returns how many pages the specified node spans over. Returns 1 if the node is contained within one page.
     */
    public int PageSpan(Node node) throws Exception {
        return GetPageEnd(node) - GetPage(node) + 1;
    }

    /**
     * Returns a list of nodes that are contained anywhere on the specified page or pages which match the specified node type.
     */
    public ArrayList RetrieveAllNodesOnPages(int startPage, int endPage, int nodeType) throws Exception {
        if (startPage < 1 || startPage > getDocument().getPageCount())
            throw new Exception("startPage");

        if (endPage < 1 || endPage > getDocument().getPageCount() || endPage < startPage)
            throw new Exception("endPage");

        CheckPageListsPopulated();

        ArrayList pageNodes = new ArrayList();

        for (int page = startPage; page <= endPage; page++) {
            // Some pages can be empty.
            if (!mReversePageLookup.containsKey(page))
                continue;

            for (Node node : (Iterable<Node>) mReversePageLookup.get(page)) {
                if (node.getParentNode() != null && ((nodeType == NodeType.ANY) || (nodeType == node.getNodeType())) && !pageNodes.contains(node))
                    pageNodes.add(node);
            }
        }

        return pageNodes;
    }

    /**
     * Splits nodes which appear over two or more pages into separate nodes so that they still appear in the same way
     * but no longer appear across a page.
     */
    public void SplitNodesAcrossPages() throws Exception {
        // Visit any composites which are possibly split across pages and split them into separate nodes.
        getDocument().accept(new SectionSplitter(this));
    }

    /**
     * Gets the document this instance works with.
     */
    public Document getDocument() {
        return mCollector.getDocument();
    }

    /**
     * This is called by <see cref="SectionSplitter"/> to update page numbers of split nodes.
     */
    void AddPageNumbersForNode(Node node, int startPage, int endPage) {
        if (startPage > 0)
            mNodeStartPageLookup.put(node, startPage);

        if (endPage > 0)
            mNodeEndPageLookup.put(node, endPage);
    }

    private void CheckPageListsPopulated() throws Exception {
        if (mReversePageLookup != null)
            return;

        mReversePageLookup = new Hashtable();

        // Add each node to a list which represent the nodes found on each page.
        for (Node node : (Iterable<Node>) getDocument().getChildNodes(NodeType.ANY, true)) {
            // Headers/Footers follow sections. They are not split by themselves.
            if (IsHeaderFooterType(node))
                continue;

            int startPage = GetPage(node);
            int endPage = GetPageEnd(node);

            for (int page = startPage; page <= endPage; page++) {
                if (!mReversePageLookup.containsKey(page))
                    mReversePageLookup.put(page, new ArrayList());

                ((ArrayList) mReversePageLookup.get(page)).add(node);
            }
        }
    }

    private static boolean IsHeaderFooterType(Node node) {
        return node.getNodeType() == NodeType.HEADER_FOOTER || node.getAncestor(NodeType.HEADER_FOOTER) != null;
    }

    // Maps node to a start/end page numbers. This is used to override baseline page numbers provided by collector when document is split.
    private Hashtable mNodeStartPageLookup = new Hashtable();
    private Hashtable mNodeEndPageLookup = new Hashtable();
    // Maps page number to a list of nodes found on that page.
    private Hashtable mReversePageLookup;
    private LayoutCollector mCollector;
}

class SectionSplitter extends DocumentVisitor {
    public SectionSplitter(PageNumberFinder pageNumberFinder) {
        mPageNumberFinder = pageNumberFinder;
    }


    public int visitParagraphStart(Paragraph paragraph) throws Exception {
        if (paragraph.isListItem()) {
            List paraList = paragraph.getListFormat().getList();
            ListLevel currentLevel = paragraph.getListFormat().getListLevel();

            // Since we have encountered a list item we need to check if this will reset
            // any subsequent list levels and if so then update the numbering of the level.
            int currentListLevelNumber = paragraph.getListFormat().getListLevelNumber();
            for (int i = currentListLevelNumber + 1; i < paraList.getListLevels().getCount(); i++) {
                ListLevel paraLevel = paraList.getListLevels().get(i);

                if (paraLevel.getRestartAfterLevel() >= currentListLevelNumber) {
                    // This list level needs to be reset after the current list number.
                    mListLevelToListNumberLookup.put(paraLevel, paraLevel.getStartAt());
                }
            }

            // A list which was used on a previous page is present on a different page, the list
            // needs to be copied so list numbering is retained when extracting individual pages.
            if (ContainsListLevelAndPageChanged(paragraph)) {
                List copyList = paragraph.getDocument().getLists().addCopy(paraList);
                mListLevelToListNumberLookup.put(currentLevel, paragraph.getListLabel().getLabelValue());

                // Set the numbering of each list level to start at the numbering of the level on the previous page.
                for (int i = 0; i < paraList.getListLevels().getCount(); i++) {
                    ListLevel paraLevel = paraList.getListLevels().get(i);

                    if (mListLevelToListNumberLookup.containsKey(paraLevel))
                        copyList.getListLevels().get(i).setStartAt((Integer) mListLevelToListNumberLookup.get(paraLevel));
                }

                mListToReplacementListLookup.put(paraList, copyList);
            }

            if (mListToReplacementListLookup.containsKey(paraList)) {
                // This paragraph belongs to a list from a previous page. Apply the replacement list.
                paragraph.getListFormat().setList((List) mListToReplacementListLookup.get(paraList));
                // This is a trick to get the spacing of the list level to set correctly.
                paragraph.getListFormat().setListLevelNumber(paragraph.getListFormat().getListLevelNumber() + 0);
            }

            mListLevelToPageLookup.put(currentLevel, mPageNumberFinder.GetPage(paragraph));
            mListLevelToListNumberLookup.put(currentLevel, paragraph.getListLabel().getLabelValue());
        }

        Section prevSection = (Section) paragraph.getParentSection().getPreviousSibling();
        Paragraph prevBodyPara = null;

        if (paragraph.getPreviousSibling() != null && paragraph.getPreviousSibling().getNodeType() == NodeType.PARAGRAPH)
            prevBodyPara = (Paragraph) paragraph.getPreviousSibling();

        Paragraph prevSectionPara = prevSection != null && paragraph == paragraph.getParentSection().getBody().getFirstChild() ? prevSection.getBody().getLastParagraph() : null;
        Paragraph prevParagraph = prevBodyPara != null ? prevBodyPara : prevSectionPara;

        if (paragraph.isEndOfSection() && !paragraph.hasChildNodes())
            paragraph.remove();

        // Paragraphs across pages can merge or remove spacing depending upon the previous paragraph.
        if (prevParagraph != null) {
            if (mPageNumberFinder.GetPage(paragraph) != mPageNumberFinder.GetPageEnd(prevParagraph)) {
                if (paragraph.isListItem() && prevParagraph.isListItem() && !prevParagraph.isEndOfSection())
                    prevParagraph.getParagraphFormat().setSpaceAfter(0);
                else if (prevParagraph.getParagraphFormat().getStyleName() == paragraph.getParagraphFormat().getStyleName() && paragraph.getParagraphFormat().getNoSpaceBetweenParagraphsOfSameStyle())
                    paragraph.getParagraphFormat().setSpaceBefore(0);
                else if (paragraph.getParagraphFormat().getPageBreakBefore() || (prevParagraph.isEndOfSection() && prevSection.getPageSetup().getSectionStart() != SectionStart.NEW_COLUMN))
                    paragraph.getParagraphFormat().setSpaceBefore(Math.max(paragraph.getParagraphFormat().getSpaceBefore() - prevParagraph.getParagraphFormat().getSpaceAfter(), 0));
                else
                    paragraph.getParagraphFormat().setSpaceBefore(0);
            }
        }

        return VisitorAction.CONTINUE;
    }

    public int visitSectionStart(Section section) throws Exception {
        mSectionCount++;
        Section previousSection = (Section) section.getPreviousSibling();

        // If there is a previous section attempt to copy any linked header footers otherwise they will not appear in an
        // extracted document if the previous section is missing.
        if (previousSection != null) {
            if (!section.getPageSetup().getRestartPageNumbering()) {
                section.getPageSetup().setRestartPageNumbering(true);
                section.getPageSetup().setPageStartingNumber(previousSection.getPageSetup().getPageStartingNumber() + mPageNumberFinder.PageSpan(previousSection));
            }

            for (HeaderFooter previousHeaderFooter : previousSection.getHeadersFooters()) {
                if (section.getHeadersFooters().getByHeaderFooterType(previousHeaderFooter.getHeaderFooterType()) == null) {
                    HeaderFooter newHeaderFooter = (HeaderFooter) previousSection.getHeadersFooters().getByHeaderFooterType(previousHeaderFooter.getHeaderFooterType()).deepClone(true);
                    section.getHeadersFooters().add(newHeaderFooter);
                }
            }
        }

        // Manually set the result of these fields before sections are split.
        for (HeaderFooter headerFooter : section.getHeadersFooters()) {
            for (Field field : headerFooter.getRange().getFields()) {
                if (field.getType() == FieldType.FIELD_SECTION || field.getType() == FieldType.FIELD_SECTION_PAGES) {
                    field.setResult((field.getType() == FieldType.FIELD_SECTION) ? Integer.toString(mSectionCount) :
                            Integer.toString(mPageNumberFinder.PageSpan(section)));

                    field.isLocked(true);
                }
            }
        }

        // All fields in the body should stay the same, this also improves field update time.
        for (Field field : section.getBody().getRange().getFields())
            field.isLocked(true);

        return VisitorAction.CONTINUE;
    }

    public int visitDocumentEnd(Document doc) throws Exception {
        // All sections have separate headers and footers now, update the fields in all headers and footers
        // to the correct values. This allows each page to maintain the correct field results even when
        // PAGE or IF fields are used.
        doc.updateFields();

        for (HeaderFooter headerFooter : (Iterable<HeaderFooter>) doc.getChildNodes(NodeType.HEADER_FOOTER, true)) {
            for (Field field : headerFooter.getRange().getFields())
                field.isLocked(true);
        }

        return VisitorAction.CONTINUE;
    }

    public int visitSmartTagEnd(SmartTag smartTag) throws Exception {
        if (IsCompositeAcrossPage(smartTag))
            SplitComposite(smartTag);

        return VisitorAction.CONTINUE;
    }

//    public int visitCustomXmlMarkupEnd(CustomXmlMarkup customXmlMarkup) throws Exception {
//        if (IsCompositeAcrossPage(customXmlMarkup))
//            SplitComposite(customXmlMarkup);
//
//        return VisitorAction.CONTINUE;
//    }

    public int visitStructuredDocumentTagEnd(StructuredDocumentTag sdt) throws Exception {
        if (IsCompositeAcrossPage(sdt))
            SplitComposite(sdt);

        return VisitorAction.CONTINUE;
    }

    public int visitCellEnd(Cell cell) throws Exception {
        if (IsCompositeAcrossPage(cell))
            SplitComposite(cell);

        return VisitorAction.CONTINUE;
    }

    public int visitRowEnd(Row row) throws Exception {
        if (IsCompositeAcrossPage(row))
            SplitComposite(row);

        return VisitorAction.CONTINUE;
    }

    public int visitTableEnd(Table table) throws Exception {
        if (IsCompositeAcrossPage(table)) {
            // Copy any header rows to other pages.
            Row[] rows = table.getRows().toArray();

            for (Table cloneTable : (Iterable<Table>) SplitComposite(table)) {
                for (Row row : rows) {
                    if (row.getRowFormat().getHeadingFormat())
                        cloneTable.prependChild(row.deepClone(true));
                }
            }
        }

        return VisitorAction.CONTINUE;
    }

    public int visitParagraphEnd(Paragraph paragraph) throws Exception {
        if (IsCompositeAcrossPage(paragraph)) {
            for (Paragraph clonePara : (Iterable<Paragraph>) SplitComposite(paragraph)) {
                // Remove list numbering from the cloned paragraph but leave the indent the same
                // as the paragraph is supposed to be part of the item before.
                if (paragraph.isListItem()) {
                    double textPosition = clonePara.getListFormat().getListLevel().getTextPosition();
                    clonePara.getListFormat().removeNumbers();
                    clonePara.getParagraphFormat().setLeftIndent(textPosition);
                }

                // Reset spacing of split paragraphs as additional spacing is removed.
                clonePara.getParagraphFormat().setSpaceBefore(0);
                paragraph.getParagraphFormat().setSpaceAfter(0);
            }
        }

        return VisitorAction.CONTINUE;
    }

    public int visitSectionEnd(Section section) throws Exception {
        if (IsCompositeAcrossPage(section)) {
            // If a TOC field spans across more than one page then the hyperlink formatting may show through.
            // Remove direct formatting to avoid this.
            for (FieldStart start : (Iterable<FieldStart>) section.getChildNodes(NodeType.FIELD_START, true)) {
                if (start.getFieldType() == FieldType.FIELD_TOC) {
                    Field field = start.getField();
                    Node node = field.getSeparator();

                    while ((node = node.nextPreOrder(section)) != field.getEnd())
                        if (node.getNodeType() == NodeType.RUN)
                            ((Run) node).getFont().clearFormatting();
                }
            }

            for (Section cloneSection : (Iterable<Section>) SplitComposite(section)) {
                cloneSection.getPageSetup().setSectionStart(SectionStart.NEW_PAGE);
                cloneSection.getPageSetup().setRestartPageNumbering(true);
                cloneSection.getPageSetup().setPageStartingNumber(section.getPageSetup().getPageStartingNumber() + (section.getDocument().indexOf(cloneSection) - section.getDocument().indexOf(section)));
                cloneSection.getPageSetup().setDifferentFirstPageHeaderFooter(false);

                RemovePageBreaksFromParagraph(cloneSection.getBody().getLastParagraph());
            }

            RemovePageBreaksFromParagraph(section.getBody().getLastParagraph());

            // Add new page numbering for the body of the section as well.
            mPageNumberFinder.AddPageNumbersForNode(section.getBody(), mPageNumberFinder.GetPage(section), mPageNumberFinder.GetPageEnd(section));
        }

        return VisitorAction.CONTINUE;
    }

    private boolean IsCompositeAcrossPage(CompositeNode composite) throws Exception {
        return (mPageNumberFinder.PageSpan(composite) > 1);
    }

    private boolean ContainsListLevelAndPageChanged(Paragraph para) throws Exception {
        return mListLevelToPageLookup.containsKey(para.getListFormat().getListLevel()) && (Integer) mListLevelToPageLookup.get(para.getListFormat().getListLevel()) != mPageNumberFinder.GetPage(para);
    }

    private void RemovePageBreaksFromParagraph(Paragraph para) throws Exception {
        if (para != null) {
            for (Run run : para.getRuns())
                run.setText(run.getText().replace(ControlChar.PAGE_BREAK, ""));
        }
    }

    private ArrayList SplitComposite(CompositeNode composite) throws Exception {
        ArrayList splitNodes = new ArrayList();
        for (Node splitNode : (Iterable<Node>) FindChildSplitPositions(composite))
            splitNodes.add(SplitCompositeAtNode(composite, splitNode));

        return splitNodes;
    }

    private ArrayList FindChildSplitPositions(CompositeNode node) throws Exception {
        // A node may span across multiple pages so a list of split positions is returned.
        // The split node is the first node on the next page.
        ArrayList splitList = new ArrayList();

        int startingPage = mPageNumberFinder.GetPage(node);

        Node[] childNodes = node.getNodeType() == NodeType.SECTION ?
                ((Section) node).getBody().getChildNodes().toArray() : node.getChildNodes().toArray();

        for (Node childNode : childNodes) {
            int pageNum = mPageNumberFinder.GetPage(childNode);

            // If the page of the child node has changed then this is the split position. Add
            // this to the list.
            if (pageNum > startingPage) {
                splitList.add(childNode);
                startingPage = pageNum;
            }

            if (mPageNumberFinder.PageSpan(childNode) > 1)
                mPageNumberFinder.AddPageNumbersForNode(childNode, pageNum, pageNum);
        }

        // Split composites backward so the cloned nodes are inserted in the right order.
        Collections.reverse(splitList);

        return splitList;
    }

    private CompositeNode SplitCompositeAtNode(CompositeNode baseNode, Node targetNode) throws Exception {
        CompositeNode cloneNode = (CompositeNode) baseNode.deepClone(false);

        Node node = targetNode;
        int currentPageNum = mPageNumberFinder.GetPage(baseNode);

        // Move all nodes found on the next page into the copied node. Handle row nodes separately.
        if (baseNode.getNodeType() != NodeType.ROW) {
            CompositeNode composite = cloneNode;

            if (baseNode.getNodeType() == NodeType.SECTION) {
                cloneNode = (CompositeNode) baseNode.deepClone(true);
                Section section = (Section) cloneNode;
                section.getBody().removeAllChildren();

                composite = section.getBody();
            }

            while (node != null) {
                Node nextNode = node.getNextSibling();
                composite.appendChild(node);
                node = nextNode;
            }
        } else {
            // If we are dealing with a row then we need to add in dummy cells for the cloned row.
            int targetPageNum = mPageNumberFinder.GetPage(targetNode);
            Node[] childNodes = baseNode.getChildNodes().toArray();

            for (Node childNode : childNodes) {
                int pageNum = mPageNumberFinder.GetPage(childNode);

                if (pageNum == targetPageNum) {
                    cloneNode.getLastChild().remove();
                    cloneNode.appendChild(childNode);
                } else if (pageNum == currentPageNum) {
                    cloneNode.appendChild(childNode.deepClone(false));
                    if (cloneNode.getLastChild().getNodeType() != NodeType.CELL)
                        ((CompositeNode) cloneNode.getLastChild()).appendChild(((CompositeNode) childNode).getFirstChild().deepClone(false));
                }
            }
        }

        // Insert the split node after the original.
        baseNode.getParentNode().insertAfter(cloneNode, baseNode);

        // Update the new page numbers of the base node and the clone node including its descendents.
        // This will only be a single page as the cloned composite is split to be on one page.
        int currentEndPageNum = mPageNumberFinder.GetPageEnd(baseNode);
        mPageNumberFinder.AddPageNumbersForNode(baseNode, currentPageNum, currentEndPageNum - 1);
        mPageNumberFinder.AddPageNumbersForNode(cloneNode, currentEndPageNum, currentEndPageNum);

        for (Node childNode : (Iterable<Node>) cloneNode.getChildNodes(NodeType.ANY, true))
            mPageNumberFinder.AddPageNumbersForNode(childNode, currentEndPageNum, currentEndPageNum);

        return cloneNode;
    }

    private Hashtable mListLevelToListNumberLookup = new Hashtable();
    private Hashtable mListToReplacementListLookup = new Hashtable();
    private Hashtable mListLevelToPageLookup = new Hashtable();
    private PageNumberFinder mPageNumberFinder;
    private int mSectionCount;
}