DocumentSearchControllerImpl.java example

Explorer

icepdf-master
- core
  - core-awt
    - src
      - main
        java
        org
        icepdf
        core
        Memento.java
        SecurityCallback.java
        events
        PageImageEvent.java
        PageInitializingEvent.java
        PageLoadingAdapter.java
        PageLoadingEvent.java
        PageLoadingListener.java
        PagePaintingEvent.java
        PaintPageEvent.java
        PaintPageListener.java
        exceptions
        PDFException.java
        PDFSecurityException.java
        io
        BitStream.java
        BufferedMarkedInputStream.java
        ByteDoubleArrayInputStream.java
        ConservativeSizingByteArrayOutputStream.java
        CountingOutputStream.java
        RandomAccessFileInputStream.java
        SeekableByteArrayInputStream.java
        SeekableInput.java
        SeekableInputConstrainedWrapper.java
        SequenceInputStream.java
        SizeInputStream.java
        ZeroPaddedInputStream.java
        pobjects
        Catalog.java
        CrossReference.java
        Destination.java
        Dictionary.java
        Document.java
        EmbeddedFileStream.java
        FileSpecification.java
        Form.java
        HexStringObject.java
        ImageStream.java
        ImageUtility.java
        LiteralStringObject.java
        Name.java
        NameNode.java
        NameTree.java
        NamedDestinations.java
        Names.java
        ObjectStream.java
        OptionalContent.java
        OptionalContentGroup.java
        OptionalContentMembership.java
        OptionalContents.java
        OutlineItem.java
        Outlines.java
        PDate.java
        PDimension.java
        PInfo.java
        PObject.java
        PRectangle.java
        PTrailer.java
        Page.java
        PageTree.java
        Permissions.java
        Reference.java
        Resources.java
        StateManager.java
        Stream.java
        StringObject.java
        Thumbnail.java
        ViewerPreferences.java
        acroform
        AdditionalActionsDictionary.java
        ButtonFieldDictionary.java
        CertSeedValueDictionary.java
        ChoiceFieldDictionary.java
        DocMDPTransferParam.java
        FieldDictionary.java
        FieldDictionaryFactory.java
        FieldMDPTransferParam.java
        InteractiveForm.java
        LockDictionary.java
        SeedValueDictionary.java
        SignatureDictionary.java
        SignatureFieldDictionary.java
        SignatureHandler.java
        SignatureReferenceDictionary.java
        TextFieldDictionary.java
        TransformParams.java
        UR3TransferParam.java
        VariableTextFieldDictionary.java
        signature
        AbstractPkcsValidator.java
        AlgorithmIdentifier.java
        DigitalSignatureFactory.java
        Pkcs1Validator.java
        Pkcs7Validator.java
        SignatureSigner.java
        SignatureValidator.java
        certificates
        CRLVerifier.java
        CertificateVerifier.java
        exceptions
        CertificateVerificationException.java
        RevocationVerificationException.java
        SelfSignedVerificationException.java
        SignatureIntegrityException.java
        actions
        Action.java
        ActionFactory.java
        FormAction.java
        GoToAction.java
        GoToRAction.java
        JavaScriptAction.java
        LaunchAction.java
        NamedAction.java
        ResetFormAction.java
        SubmitFormAction.java
        URIAction.java
        annotations
        AbstractWidgetAnnotation.java
        Annotation.java
        AnnotationFactory.java
        Appearance.java
        AppearanceState.java
        BorderEffect.java
        BorderStyle.java
        ButtonWidgetAnnotation.java
        ChoiceWidgetAnnotation.java
        CircleAnnotation.java
        FreeTextAnnotation.java
        GenericAnnotation.java
        InkAnnotation.java
        LineAnnotation.java
        LinkAnnotation.java
        MarkupAnnotation.java
        PopupAnnotation.java
        SignatureWidgetAnnotation.java
        SquareAnnotation.java
        TextAnnotation.java
        TextMarkupAnnotation.java
        TextWidgetAnnotation.java
        WidgetAnnotation.java
        filters
        ASCII85Decode.java
        ASCIIHexDecode.java
        CCITTFax.java
        CCITTFaxDecoder.java
        ChunkingInputStream.java
        FlateDecode.java
        G4State.java
        LZWDecode.java
        PredictorDecode.java
        RunLengthDecode.java
        fonts
        AFM.java
        CMap.java
        Encoding.java
        Font.java
        FontDescriptor.java
        FontFactory.java
        FontFile.java
        FontManager.java
        ofont
        CMap.java
        Encoding.java
        Font.java
        OFont.java
        functions
        Function.java
        Function_0.java
        Function_2.java
        Function_3.java
        Function_4.java
        postscript
        Expression.java
        Lexer.java
        LexerText.java
        Operator.java
        OperatorFactory.java
        OperatorNames.java
        Procedure.java
        graphics
        BlendComposite.java
        BlurredImageReference.java
        CachedImageReference.java
        CalGray.java
        CalRGB.java
        ColorSpaceCMYK.java
        DeviceCMYK.java
        DeviceGray.java
        DeviceN.java
        DeviceRGB.java
        ExtGState.java
        FloydSteinbergImageReference.java
        GlyphOutlineClip.java
        GraphicsState.java
        ICCBased.java
        ImagePool.java
        ImageReference.java
        ImageReferenceFactory.java
        ImageStreamReference.java
        Indexed.java
        InlineImageStreamReference.java
        Lab.java
        MipMappedImageReference.java
        OptionalContentState.java
        PColorSpace.java
        PaintTimer.java
        Pattern.java
        PatternColor.java
        RasterOps
        CMYKRasterOp.java
        DecodeRasterOp.java
        GrayRasterOp.java
        IccCmykRasterOp.java
        PColorSpaceRasterOp.java
        YCCKRasterOp.java
        YCbCrARasterOp.java
        YCbCrRasterOp.java
        ScaledImageReference.java
        Separation.java
        ShadingMeshPattern.java
        ShadingPattern.java
        ShadingType1Pattern.java
        ShadingType2Pattern.java
        ShadingType3Pattern.java
        ShadingType4Pattern.java
        ShadingType5Pattern.java
        ShadingType6Pattern.java
        ShadingType7Pattern.java
        Shapes.java
        SmoothScaledImageReference.java
        SoftMask.java
        TextSprite.java
        TextState.java
        TilingPattern.java
        WatermarkCallback.java
        batik
        ext
        awt
        LinearGradientPaint.java
        LinearGradientPaintContext.java
        MultipleGradientPaint.java
        MultipleGradientPaintContext.java
        RadialGradientPaint.java
        RadialGradientPaintContext.java
        image
        GraphicsUtil.java
        commands
        AbstractDrawCmd.java
        AlphaDrawCmd.java
        BlendCompositeDrawCmd.java
        ClipDrawCmd.java
        ColorDrawCmd.java
        DrawCmd.java
        DrawDrawCmd.java
        FillDrawCmd.java
        FormDrawCmd.java
        GlyphOutlineDrawCmd.java
        GraphicsStateCmd.java
        ImageDrawCmd.java
        NoClipDrawCmd.java
        OCGEndDrawCmd.java
        OCGStartDrawCmd.java
        PaintDrawCmd.java
        PostScriptEncoder.java
        ShapeDrawCmd.java
        ShapesDrawCmd.java
        StrokeDrawCmd.java
        TextSpriteDrawCmd.java
        TextTransformDrawCmd.java
        TilingPatternDrawCmd.java
        TransformDrawCmd.java
        text
        AbstractText.java
        GlyphText.java
        LinePositionComparator.java
        LineText.java
        PageText.java
        Text.java
        TextPositionComparator.java
        TextSelect.java
        WordPositionComparator.java
        WordText.java
        security
        CryptFilter.java
        CryptFilterEntry.java
        EncryptionDictionary.java
        Permissions.java
        PublicSecurityHandler.java
        SecurityHandler.java
        SecurityHandlerInterface.java
        SecurityManager.java
        StandardEncryption.java
        StandardSecurityHandler.java
        search
        DocumentSearchController.java
        SearchTerm.java
        util
        ColorUtil.java
        Defs.java
        FontUtil.java
        GraphicsRenderingHints.java
        HexDumper.java
        LazyObjectLoader.java
        Library.java
        Parser.java
        PdfOps.java
        PropertyConstants.java
        SoftLRUCache.java
        Utils.java
        content
        AbstractContentParser.java
        ContentParser.java
        ContentParserFactory.java
        OContentParser.java
        TextMetrics.java
        loggers
        BriefLogFormatter.java
        BriefestLogFormatter.java
        java-templates
        ProductInfo.java
        resources
        org
        icepdf
        core
        application
        ProductInfo.java
- examples
  - annotation
    - callback
      - src
        main
        java
        org
        icepdf
        os
        examples
        annotation
        callback
        MyAnnotationCallback.java
    - creation
      - src
        main
        java
        org
        icepdf
        os
        examples
        annotation
        creation
        NewAnnotationPostPageLoad.java
        NewAnnotationPrePageLoad.java
  - capture
    - listener
      - src
        main
        java
        org
        icepdf
        os
        examples
        capture
        DocumentCapture.java
    - png
      - src
        main
        java
        org
        icepdf
        os
        examples
        capture
        PageCapture.java
    - portfolio
      - src
        main
        java
        org
        icepdf
        os
        examples
        capture
        PortfolioCapture.java
    - svg
      - src
        main
        java
        org
        icepdf
        os
        examples
        svg
        SvgCapture.java
    - tiff
      - src
        main
        java
        org
        icepdf
        os
        capture
        MultiPageCapture.java
    - watermark
      - src
        main
        java
        org
        icepdf
        os
        examples
        watermark
        WatermarkPageCapture.java
  - component
    - src
      - main
        java
        org
        icepdf
        os
        examples
        component
        ViewerComponentExample.java
  - extraction
    - image
      - src
        main
        java
        org
        icepdf
        os
        examples
        extraction
        PageImageExtraction.java
    - metadata
      - src
        main
        java
        org
        icepdf
        os
        examples
        extraction
        PageMetaDataExtraction.java
    - text
      - src
        main
        java
        org
        icepdf
        os
        examples
        extraction
        PageTextExtraction.java
  - icefaces
    - src
      - main
        java
        org
        icepdf
        examples
        jsf
        viewer
        renderkit
        PdfResourceHandler.java
        servlet
        PdfRenderer.java
        util
        FacesUtils.java
        InputFileSessionCleaner.java
        view
        BeanNames.java
        DocumentCache.java
        DocumentManager.java
        DocumentState.java
        OutlineItemTreeNode.java
  - javafx
    - src
      - main
        java
        org
        icepdf
        os
        examples
        javafx
        PdfFXViewer.java
  - jws
    - src
      - org
        icepdf
        os
        examples
        WebStart.java
  - loadingEvents
    - src
      - main
        java
        org
        icepdf
        os
        loadingEvents
        PageLoadingEvents.java
  - printservices
    - src
      - main
        java
        org
        icepdf
        os
        examples
        print
        PrintServices.java
  - search
    - component
      - src
        main
        java
        org
        icepdf
        os
        search
        SearchController.java
    - headless
      - src
        main
        java
        org
        icepdf
        os
        search
        SearchControllerHeadless.java
  - signatures
    - src
      - main
        java
        org
        icepdf
        os
        examples
        signatures
        SignatureVerification.java
- viewer
  - viewer-awt
    - src
      - main
        java
        org
        icepdf
        ri
        common
        AboutDialog.java
        ComponentKeyBinding.java
        CurrentPageChanger.java
        DocumentInformationDialog.java
        EscapeJDialog.java
        FileExtensionUtils.java
        FloatTextFieldInputVerifier.java
        FloatTextFieldKeyListener.java
        KeyEventConstants.java
        KeyListenerPageChanger.java
        KeyListenerPageColumnChanger.java
        MouseWheelCurrentPageListener.java
        MouseWheelListenerPageChanger.java
        MyAnnotationCallback.java
        MyGUISecurityCallback.java
        PageNumberTextFieldInputVerifier.java
        PageNumberTextFieldKeyListener.java
        PageThumbnailComponent.java
        PermissionsDialog.java
        PrintHelper.java
        PrintJobWatcher.java
        PrinterTask.java
        SwingController.java
        SwingViewBuilder.java
        SwingWorker.java
        TextExtractionGlue.java
        ToolbarLayout.java
        UndoCaretaker.java
        ViewModel.java
        WindowManagementCallback.java
        fonts
        FindFontsTask.java
        FontDialog.java
        search
        DocumentSearchControllerImpl.java
        DocumentSearchModelImpl.java
        tools
        AnnotationSelectionHandler.java
        CircleAnnotationHandler.java
        CommonToolHandler.java
        DynamicZoomHandler.java
        FreeTextAnnotationHandler.java
        HighLightAnnotationHandler.java
        InkAnnotationHandler.java
        LineAnnotationHandler.java
        LineArrowAnnotationHandler.java
        LinkAnnotationHandler.java
        MouseWheelZoom.java
        PanningHandler.java
        SelectionBoxHandler.java
        SquareAnnotationHandler.java
        StrikeOutAnnotationHandler.java
        TextAnnotationHandler.java
        TextSelection.java
        TextSelectionPageHandler.java
        TextSelectionViewHandler.java
        ToolHandler.java
        UnderLineAnnotationHandler.java
        ZoomInPageHandler.java
        ZoomInViewHandler.java
        ZoomOutPageHandler.java
        utility
        annotation
        ActionsPanel.java
        AnnotationDialogAdapter.java
        AnnotationPanel.java
        AnnotationPanelAdapter.java
        AnnotationProperties.java
        BorderPanel.java
        CircleAnnotationPanel.java
        FlagsPanel.java
        FreeTextAnnotationPanel.java
        GoToActionDialog.java
        InkAnnotationPanel.java
        LineAnnotationPanel.java
        LinkAnnotationPanel.java
        NameJTree.java
        NameTreeDialog.java
        NameTreeNode.java
        SquareAnnotationPanel.java
        TextAnnotationPanel.java
        TextMarkupAnnotationPanel.java
        ValueLabelItem.java
        attachment
        AttachmentPanel.java
        FileTableModel.java
        layers
        LayersPanel.java
        LayersTree.java
        LayersTreeNode.java
        outline
        OutlineItemTreeNode.java
        OutlinesTree.java
        search
        SearchPanel.java
        signatures
        SigPropertyTreeNode.java
        SigVerificationTask.java
        SignatureCellRender.java
        SignatureCertTreeNode.java
        SignatureTreeNode.java
        SignatureUtilities.java
        SignaturesPanel.java
        SignaturesTree.java
        thumbs
        ThumbnailsPanel.java
        views
        AbstractDocumentView.java
        AbstractDocumentViewModel.java
        AbstractPageViewComponent.java
        AnnotationCallback.java
        AnnotationComponent.java
        CollectionDocumentView.java
        Controller.java
        DocumentView.java
        DocumentViewComponent.java
        DocumentViewController.java
        DocumentViewControllerImpl.java
        DocumentViewModel.java
        DocumentViewModelImpl.java
        ModifiedFlowLayout.java
        OneColumnPageView.java
        OnePageView.java
        PageViewComponent.java
        PageViewComponentImpl.java
        PageViewDecorator.java
        ResizableBorder.java
        TwoColumnPageView.java
        TwoPageView.java
        annotations
        AbstractAnnotationComponent.java
        AnnotationComponentFactory.java
        AnnotationState.java
        CircleAnnotationComponent.java
        FreeTextAnnotationComponent.java
        FreeTextArea.java
        InkAnnotationComponent.java
        LineAnnotationComponent.java
        LinkAnnotationComponent.java
        MarkupAnnotationComponent.java
        PolyLineAnnotationComponent.java
        PolygonAnnotationComponent.java
        PopupAnnotationComponent.java
        ScalableField.java
        ScalableJComboBox.java
        ScalableJList.java
        ScalableJScrollPane.java
        ScalablePasswordField.java
        ScalableTextArea.java
        ScalableTextField.java
        SignatureFieldComponent.java
        SquareAnnotationComponent.java
        TextAnnotationComponent.java
        TextMarkupAnnotationComponent.java
        WidgetAnnotationComponent.java
        signatures
        CertificatePropertiesDialog.java
        SignaturePropertiesDialog.java
        SignatureValidationDialog.java
        SignatureValidationPanel.java
        SignatureValidationStatus.java
        SignerInfoPanel.java
        SignerSummaryPanel.java
        listeners
        DefaultPageViewLoadingListener.java
        MetricsPageLoadingListener.java
        PageViewLoadingListener.java
        images
        Images.java
        util
        BareBonesBrowserLaunch.java
        FontPropertiesManager.java
        MacOSAdapter.java
        Parse.java
        PropertiesManager.java
        Resources.java
        SearchTextTask.java
        StringResource.java
        TextExtractionTask.java
        URLAccess.java
        jxlayer
        JXLayer.java
        plaf
        LayerUI.java
        viewer
        Launcher.java
        Main.java
        SplashWindow.java
        WindowManager.java

/*
 * Copyright 2006-2017 ICEsoft Technologies Canada Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the
 * License. You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an "AS
 * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */
package org.icepdf.ri.common.search;

import org.icepdf.core.pobjects.Document;
import org.icepdf.core.pobjects.graphics.text.LineText;
import org.icepdf.core.pobjects.graphics.text.PageText;
import org.icepdf.core.pobjects.graphics.text.WordText;
import org.icepdf.core.search.DocumentSearchController;
import org.icepdf.core.search.SearchTerm;
import org.icepdf.ri.common.SwingController;

import java.util.ArrayList;
import java.util.Collection;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * Document search controller used to manage document searches.  This class
 * class takes care of many of the performance issues of doing searches on
 * larges documents and is also used by PageViewComponentImpl to highlight
 * search results.
 * <br>
 * This implementation uses simple search algorithm that will work well for most
 * users. This class can be extended and the method {@link #searchHighlightPage(int)}
 * can be overridden for custom search implementations.
 * <br>
 * The DocumentSearchControllerImpl can be constructed to be used with the
 * Viewer RI source code via the constructor that takes a SwingController as
 * a parameter.  The second variation is ended for a headless environment where
 * Swing is not needed, the constructor for this instance takes a Document
 * as a parameter.
 *
 * @since 4.0
 */
public class DocumentSearchControllerImpl implements DocumentSearchController {

    private static final Logger logger =
            Logger.getLogger(DocumentSearchControllerImpl.class.toString());

    // search model contains caching and memory optimizations.
    protected DocumentSearchModelImpl searchModel;
    // parent controller used to get at RI controllers and models.
    protected SwingController viewerController;
    // assigned document for headless searching.
    protected Document document;

    /**
     * Create a news instance of search controller. A search model is created
     * for this instance.
     *
     * @param viewerController parent controller/mediator.
     */
    public DocumentSearchControllerImpl(SwingController viewerController) {
        this.viewerController = viewerController;
        searchModel = new DocumentSearchModelImpl();
    }

    /**
     * Create a news instance of search controller intended to be used in a
     * headless environment.  A search model is created for this instance.
     *
     * @param document document to search.
     */
    public DocumentSearchControllerImpl(Document document) {
        searchModel = new DocumentSearchModelImpl();
        this.document = document;
    }

    /**
     * Searches the given page using the specified term and properties.  The
     * search model is updated to store the pages Page text as a weak reference
     * which can be queried using isSearchHighlightNeeded to efficiently make
     * sure that a pages text is highlighted even after a dispose/init cycle.
     * If the text state is no longer present then the search should be executed
     * again.
     * <br>
     * This method clears the search results for the page before it searches. If
     * you wish to have cumulative search results then searches terms should
     * be added with {@link #addSearchTerm(String, boolean, boolean)} and the
     * method {@link #searchPage(int)} should be called after each term is
     * added or after all have been added.
     *
     * @param pageIndex     page to search
     * @param caseSensitive if true use case sensitive searches
     * @param wholeWord     if true use whole word searches
     * @param term          term to search for
     * @return number for hits for this page.
     */
    public int searchHighlightPage(int pageIndex, String term,
                                   boolean caseSensitive, boolean wholeWord) {
        // clear previous search
        clearSearchHighlight(pageIndex);
        // add the search term
        addSearchTerm(term, caseSensitive, wholeWord);
        // start the search and return the hit count.
        return searchHighlightPage(pageIndex);
    }

    /**
     * Searches the page index given the search terms that have been added
     * with {@link #addSearchTerm(String, boolean, boolean)}.  If search
     * hits where detected then the Page's PageText is added to the cache.
     * <br>
     * This method represent the core search algorithm for this
     * DocumentSearchController implementation.  This method can be over riden
     * if a different search algorithm or functionality is needed.
     *
     * @param pageIndex page index to search
     * @return number of hits found for this page.
     */
    public int searchHighlightPage(int pageIndex) {

        // get search terms from model and search for each occurrence.
        Collection<SearchTerm> terms = searchModel.getSearchTerms();

        // search hit count
        int hitCount = 0;

        // get our our page text reference
        PageText pageText = getPageText(pageIndex);

        // some pages just don't have any text. 
        if (pageText == null) {
            return 0;
        }

        // we need to do the search for  each term.
        for (SearchTerm term : terms) {

            // found word index to keep track of when we have found a hit
            int searchPhraseHitCount = 0;
            int searchPhraseFoundCount = term.getTerms().size();
            // list of found words for highlighting, as hits can span
            // lines and pages
            ArrayList<WordText> searchPhraseHits =
                    new ArrayList<WordText>(searchPhraseFoundCount);

            // start iteration over words.
            ArrayList<LineText> pageLines = pageText.getPageLines();
            if (pageLines != null) {
                for (LineText pageLine : pageLines) {
                    java.util.List<WordText> lineWords = pageLine.getWords();
                    // compare words against search terms.
                    String wordString;
                    for (WordText word : lineWords) {
                        // apply case sensitivity rule.
                        wordString = term.isCaseSensitive() ? word.toString() :
                                word.toString().toLowerCase();
                        // word matches, we have to match full word hits
                        if (term.isWholeWord()) {
                            if (wordString.equals(
                                    term.getTerms().get(searchPhraseHitCount))) {
                                // add word to potentials
                                searchPhraseHits.add(word);
                                searchPhraseHitCount++;
                            }
                            //                                else if (wordString.length() == 1 &&
                            //                                        WordText.isPunctuation(wordString.charAt(0))){
                            //                                    // ignore punctuation
                            //                                    searchPhraseHitCount++;
                            //                                }
                            // reset the counters.
                            else {
                                searchPhraseHits.clear();
                                searchPhraseHitCount = 0;
                            }
                        }
                        // otherwise we look for an index of hits
                        else {
                            // found a potential hit, depends on the length
                            // of searchPhrase.
                            if (wordString.contains(term.getTerms().get(searchPhraseHitCount))) {
                                // add word to potentials
                                searchPhraseHits.add(word);
                                searchPhraseHitCount++;
                            }
                            //                                else if (wordString.length() == 1 &&
                            //                                        WordText.isPunctuation(wordString.charAt(0))){
                            //                                    // ignore punctuation
                            //                                    searchPhraseHitCount++;
                            //                                }
                            // reset the counters.
                            else {
                                searchPhraseHits.clear();
                                searchPhraseHitCount = 0;
                            }

                        }
                        // check if we have found what we're looking for
                        if (searchPhraseHitCount == searchPhraseFoundCount) {
                            // iterate of found, highlighting words
                            for (WordText wordHit : searchPhraseHits) {
                                wordHit.setHighlighted(true);
                                wordHit.setHasHighlight(true);
                            }

                            // rest counts and start over again.
                            hitCount++;
                            searchPhraseHits.clear();
                            searchPhraseHitCount = 0;
                        }

                    }
                }
            }
        }

        // if we have a hit we'll add it to the model cache
        if (hitCount > 0) {
            searchModel.addPageSearchHit(pageIndex, pageText);
            if (logger.isLoggable(Level.FINE)) {
                logger.fine("Found search hits on page " + pageIndex +
                        " hit count " + hitCount);
            }
        }

        return hitCount;
    }

    /**
     * Searches the page index given the search terms that have been added
     * with {@link #addSearchTerm(String, boolean, boolean)}.  If search
     * hits where detected then the Page's PageText is added to the cache.
     * <br>
     * This class differences from {@link #searchHighlightPage(int)} in that
     * is returns a list of lineText fragments for each hit but the LinText
     * is padded by pre and post words that surround the hit in the page
     * context.
     * <br>
     * This method represent the core search algorithm for this
     * DocumentSearchController implementation.  This method can be over riden
     * if a different search algorithm or functionality is needed.
     *
     * @param pageIndex   page index to search
     * @param wordPadding word padding on either side of hit to give context
     *                    to found words in the returned LineText
     * @return list of contextual hits for the give page.  If no hits an empty
     * list is returned.
     */
    public ArrayList<LineText> searchHighlightPage(int pageIndex, int wordPadding) {
        // get search terms from model and search for each occurrence.
        Collection<SearchTerm> terms = searchModel.getSearchTerms();

        // search hit list
        ArrayList<LineText> searchHits = new ArrayList<LineText>();

        // get our our page text reference
        PageText pageText = getPageText(pageIndex);

        // some pages just don't have any text.
        if (pageText == null) {
            return searchHits;
        }

        // we need to do the search for  each term.
        for (SearchTerm term : terms) {

            // found word index to keep track of when we have found a hit
            int searchPhraseHitCount = 0;
            int searchPhraseFoundCount = term.getTerms().size();
            // list of found words for highlighting, as hits can span
            // lines and pages
            ArrayList<WordText> searchPhraseHits =
                    new ArrayList<WordText>(searchPhraseFoundCount);

            // start iteration over words.
            ArrayList<LineText> pageLines = pageText.getPageLines();
            if (pageLines != null) {
                for (LineText pageLine : pageLines) {
                    java.util.List<WordText> lineWords = pageLine.getWords();
                    // compare words against search terms.
                    String wordString;
                    WordText word;
                    for (int i = 0, max = lineWords.size(); i < max; i++) {
                        word = lineWords.get(i);

                        // apply case sensitivity rule.
                        wordString = term.isCaseSensitive() ? word.toString() :
                                word.toString().toLowerCase();
                        // word matches, we have to match full word hits
                        if (term.isWholeWord()) {
                            if (wordString.equals(
                                    term.getTerms().get(searchPhraseHitCount))) {
                                // add word to potentials
                                searchPhraseHits.add(word);
                                searchPhraseHitCount++;
                            }
                            // reset the counters.
                            else {
                                searchPhraseHits.clear();
                                searchPhraseHitCount = 0;
                            }
                        }
                        // otherwise we look for an index of hits
                        else {
                            // found a potential hit, depends on the length
                            // of searchPhrase.
                            if (wordString.contains(term.getTerms().get(searchPhraseHitCount))) {
                                // add word to potentials
                                searchPhraseHits.add(word);
                                searchPhraseHitCount++;
                            }
                            // reset the counters.
                            else {
                                searchPhraseHits.clear();
                                searchPhraseHitCount = 0;
                            }

                        }
                        // check if we have found what we're looking for
                        if (searchPhraseHitCount == searchPhraseFoundCount) {

                            LineText lineText = new LineText();
                            int lineWordsSize = lineWords.size();
                            java.util.List<WordText> hitWords = lineText.getWords();
                            // add pre padding
                            int start = i - searchPhraseHitCount - wordPadding + 1;
                            start = start < 0 ? 0 : start;
                            int end = i - searchPhraseHitCount + 1;
                            end = end < 0 ? 0 : end;
                            for (int p = start; p < end; p++) {
                                hitWords.add(lineWords.get(p));
                            }

                            // iterate of found, highlighting words
                            for (WordText wordHit : searchPhraseHits) {
                                wordHit.setHighlighted(true);
                                wordHit.setHasHighlight(true);
                            }
                            hitWords.addAll(searchPhraseHits);

                            // add word padding to front of line
                            start = i + 1;
                            start = start > lineWordsSize ? lineWordsSize : start;
                            end = start + wordPadding;
                            end = end > lineWordsSize ? lineWordsSize : end;
                            for (int p = start; p < end; p++) {
                                hitWords.add(lineWords.get(p));
                            }

                            // add the hits to our list.
                            searchHits.add(lineText);

                            searchPhraseHits.clear();
                            searchPhraseHitCount = 0;
                        }

                    }
                }
            }
        }

        // if we have a hit we'll add it to the model cache
        if (searchHits.size() > 0) {
            searchModel.addPageSearchHit(pageIndex, pageText);
            if (logger.isLoggable(Level.FINE)) {
                logger.fine("Found search hits on page " + pageIndex +
                        " hit count " + searchHits.size());
            }
        }

        return searchHits;
    }

    /**
     * Search page but only return words that are hits.  Highlighting is till
     * applied but this method can be used if other data needs to be extracted
     * from the found words.
     *
     * @param pageIndex page to search
     * @return list of words that match the term and search properties.
     */
    public ArrayList<WordText> searchPage(int pageIndex) {

        int hits = searchHighlightPage(pageIndex);
        if (hits > 0) {
            PageText searchText = searchModel.getPageTextHit(pageIndex);
            if (searchText != null) {
                ArrayList<WordText> words = new ArrayList<WordText>(hits);
                ArrayList<LineText> pageLines = searchText.getPageLines();
                if (pageLines != null) {
                    for (LineText pageLine : pageLines) {
                        java.util.List<WordText> lineWords = pageLine.getWords();
                        if (lineWords != null) {
                            for (WordText word : lineWords) {
                                if (word.isHighlighted()) {
                                    words.add(word);
                                }
                            }
                        }
                    }
                }
                return words;
            }
        }
        return null;
    }

    /**
     * Add the search term to the list of search terms.  The term is split
     * into words based on white space and punctuation. No checks are done
     * for duplication.
     * <br>
     * A new search needs to be executed for this change to take place.
     *
     * @param term          single word or phrase to search for.
     * @param caseSensitive is search case sensitive.
     * @param wholeWord     is search whole word sensitive.
     * @return searchTerm newly create search term.
     */
    public SearchTerm addSearchTerm(String term, boolean caseSensitive,
                                    boolean wholeWord) {
        // keep origional copy
        String origionalTerm = String.valueOf(term);

        // check criteria for case sensitivity.
        if (!caseSensitive) {
            term = term.toLowerCase();
        }
        // parse search term out into words, so we can match
        // them against WordText
        ArrayList<String> searchPhrase = searchPhraseParser(term);
        // finally add the search term to the list and return it for management
        SearchTerm searchTerm =
                new SearchTerm(origionalTerm, searchPhrase, caseSensitive, wholeWord);
        searchModel.addSearchTerm(searchTerm);
        return searchTerm;
    }

    /**
     * Removes the specified search term from the search. A new search needs
     * to be executed for this change to take place.
     *
     * @param searchTerm search term to remove.
     */
    public void removeSearchTerm(SearchTerm searchTerm) {
        searchModel.removeSearchTerm(searchTerm);
    }

    /**
     * Clear all searched items for specified page.
     *
     * @param pageIndex page indext to clear
     */
    public void clearSearchHighlight(int pageIndex) {
        // clear cache and terms list 
        searchModel.clearSearchResults(pageIndex);
    }

    /**
     * Clears all highlighted text states for this this document.  This optimized
     * to use the the SearchHighlightModel to only clear pages that still have
     * selected states.
     */
    public void clearAllSearchHighlight() {
        searchModel.clearSearchResults();
    }

    /**
     * Test to see if a search highlight is needed.  This is done by first
     * check if there is a hit for this page and if the PageText object is the
     * same as the one specified as a param.  If they are not the same PageText
     * object then we need to do refresh as the page was disposed and
     * reinitialized with new content.
     *
     * @param pageIndex page index to text for results.
     * @param pageText  current pageText object associated with the pageIndex.
     * @return true if refresh is needed, false otherwise.
     */
    public boolean isSearchHighlightRefreshNeeded(int pageIndex, PageText pageText) {

        // check model to see if pages pagTex still has reference
        return searchModel.isPageTextMatch(pageIndex, pageText);
    }

    /**
     * Disposes controller clearing resources.
     */
    public void dispose() {
        searchModel.clearSearchResults();
    }

    /**
     * Gets teh page text for the given page index.
     *
     * @param pageIndex page index of page to extract text.
     * @return page's page text,  can be null.
     */
    protected PageText getPageText(int pageIndex) {
        PageText pageText = null;
        try {
            if (viewerController != null) {
                // get access to currently open document instance.
                pageText = viewerController.getDocument().getPageViewText(pageIndex);
            } else if (document != null) {
                pageText = document.getPageViewText(pageIndex);
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            logger.fine("PageText extraction thread was interrupted.");
        }
        return pageText;
    }

    /**
     * Utility for breaking the pattern up into searchable words.  Breaks are
     * done on white spaces and punctuation.
     *
     * @param phrase pattern to search words for.
     * @return list of words that make up phrase, words, spaces, punctuation.
     */
    protected ArrayList<String> searchPhraseParser(String phrase) {
        // trim white space, not really useful.
        phrase = phrase.trim();
        // found words. 
        ArrayList<String> words = new ArrayList<String>();
        char c;
        char cPrev = 0;
        for (int start = 0, curs = 0, max = phrase.length(); curs < max; curs++) {
            c = phrase.charAt(curs);
            if (WordText.isWhiteSpace(c) || (WordText.isPunctuation(c) && !WordText.isDigit(cPrev))) {
                // add word segment
                if (start != curs) {
                    words.add(phrase.substring(start, curs));
                }
                // add white space  as word too.
                words.add(phrase.substring(curs, curs + 1));
                // start
                start = curs + 1 < max ? curs + 1 : start;
            } else if (curs + 1 == max) {
                words.add(phrase.substring(start, curs + 1));
            }
            cPrev = c;
        }
        return words;
    }
}