Matcher.java example

Explorer
jhotdraw-master
/*
 * @(#)Matcher.java
 *
 * Copyright (c) 2007 The authors and contributors of JHotDraw.
 * You may not use, copy or modify this file, except in compliance with the 
 * accompanying license terms.
 */
package org.jhotdraw.samples.teddy.regex;

import javax.swing.text.*;

/**
 * Searches for an occurence of a case (in)sensitive text on a document. This is
 * a rather slow implementation that does not use advanced techniques such as
 * Boyer-Moore.
 *
 * @author Werner Randelshofer
 * @version $Id$
 */
public class Matcher {

    /**
     * The document to be examined.
     */
    private Document document;
    /**
     * The string to be matched.
     */
    private String findString;
    /**
     * The start index for the next findNext operation.
     */
    private int startIndex;

    /**
     * The array of lower case matching chars.
     */
    private char[] matchLowerCase;

    /**
     * The array of upper case matching chars.
     */
    private char[] matchUpperCase;

    /**
     * The match type.
     */
    private MatchType matchType;

    /**
     * Creates a new instance of Matcher which performs a case sensitive search.
     *
     * @param document The document to be examined
     * @param findString The string to be searched.
     */
    public Matcher(Document document, String findString) {
        this(document, findString, true, MatchType.CONTAINS);
    }

    /**
     * Creates a new instance of Matcher
     *
     * @param document The document to be examined
     * @param findString The string to be searched.
     * @param matchCase Set to true for case sensitive search.
     * @param matchType Sets the match type.
     */
    public Matcher(Document document, String findString, boolean matchCase, MatchType matchType) {
        this.document = document;
        this.findString = findString;
        startIndex = 0;

        // Convert to chars for efficiency
        if (matchCase) {
            matchLowerCase = matchUpperCase = findString.toCharArray();
        } else {
            matchUpperCase = findString.toUpperCase().toCharArray();
            matchLowerCase = findString.toLowerCase().toCharArray();
        }

        this.matchType = matchType;
    }

    public String getFindString() {
        return findString;
    }

    public boolean isMatchCase() {
        return matchLowerCase == matchUpperCase;
    }

    public MatchType getMatchType() {
        return matchType;
    }

    /**
     * Sets the start index for the findNext(), findPrevious() methods.
     */
    public void setStartIndex(int newValue) {
        startIndex = newValue;
    }

    /**
     * Resets this matcher and then attempts to find the next subsequence of the
     * input sequence that matches the pattern, starting at the specified index.
     *
     * @param startIndex the index from which to start the search.
     * @return the index of the first occurrence of the search string, starting
     * at the specified offset, or -1 if no occurrence was found.
     */
    public int findNext(int startIndex) {
        this.startIndex = startIndex;
        return findNext();
    }

    /**
     * Attempts to find the next subsequence of the input sequence that matches
     * the pattern.
     * <p>
     * This method starts at the beginning of the input sequence or, if a
     * previous invocation of the method was successful and the matcher has not
     * since been reset, at the first character not matched by the previous
     * match.
     *
     * @return the index of the first occurrence of the search string, starting
     * at the specified offset, or -1 if no occurrence was found.
     */
    public int findNext() {
        // Don't match empty strings and don't match if we are at the end of the document.
        if (findString.length() == 0
                || document.getLength() - findString.length() < startIndex) {
            return -1;
        }

        try {
            int nextMatch = 0; // index of next matching character

            // Iterate through all segments of the document starting from offset
            Segment text = new Segment();
            text.setPartialReturn(true);
            int offset = startIndex;
            int nleft = document.getLength() - startIndex;
            while (nleft > 0) {
                document.getText(offset, nleft, text);

                // Iterate through the characters in the current segment
                char next = text.first();
                for (text.first(); next != Segment.DONE; next = text.next()) {

                    // Check if the current character matches with the next
                    // search character.
                    char current = text.current();
                    if (current == matchUpperCase[nextMatch]
                            || current == matchLowerCase[nextMatch]) {
                        nextMatch++;

                        // Did we match all search characters?
                        if (nextMatch == matchLowerCase.length) {
                            int foundIndex = text.getIndex() - text.getBeginIndex() + offset
                                    - matchLowerCase.length + 1;
                            switch (matchType) {
                                case CONTAINS:
                                    return foundIndex;
                                    // break; <- never reached

                                case STARTS_WITH:
                                    if (!isWordChar(foundIndex - 1)) {
                                        return foundIndex;
                                    }
                                    break;

                                case FULL_WORD:
                                    if (!isWordChar(foundIndex - 1)
                                            && !isWordChar(foundIndex + matchLowerCase.length)) {
                                        return foundIndex;
                                    }
                                    break;
                            }
                            nextMatch = 0;
                        }
                    } else {
                        nextMatch = 0;
                    }
                }

                // Move forward to the next segment
                nleft -= text.count;
                offset += text.count;
            }
            return -1;
        } catch (BadLocationException e) {
            throw new IndexOutOfBoundsException();
        }
    }

    /**
     * Resets this matcher and then attempts to find the previous subsequence of
     * the input sequence that matches the pattern, starting at the specified
     * index.
     *
     * @param startIndex the index from which to start the search.
     * @return the index of the first occurrence of the search string, starting
     * at the specified offset, or -1 if no occurrence was found.
     */
    public int findPrevious(int startIndex) {
        this.startIndex = startIndex;
        return findPrevious();
    }

    /**
     * Attempts to find the previous subsequence of the input sequence that
     * matches the pattern.
     * <p>
     * This method starts at the beginning of the input sequence or, if a
     * previous invocation of the method was successful and the matcher has not
     * since been reset, at the first character not matched by the previous
     * match.
     *
     * @return the index of the first occurrence of the search string, starting
     * at the specified offset, or -1 if no occurrence was found.
     */
    public int findPrevious() {
        // Don't match empty strings and don't match if we are at the beginning of the document.
        if (findString.length() == 0
                || startIndex < findString.length() - 1) {
            //System.out.println("too close to start");
            return -1;
        }

        try {
            int nextMatch = matchLowerCase.length - 1; // index of next matching character

            // For simplicity, we request all text of the document in a single
            // segment.
            Segment text = new Segment();
            text.setPartialReturn(false);
            document.getText(0, startIndex + 1, text);

            // Iterate through the characters in the current segment
            char previous = text.last();
            //System.out.println("previus isch "+previous);
            for (text.last(); previous != Segment.DONE; previous = text.previous()) {

                // Check if the current character matches with the next
                // search character.
                char current = text.current();
                if (current == matchUpperCase[nextMatch]
                        || current == matchLowerCase[nextMatch]) {
                    nextMatch--;
                    //System.out.println("matched "+nextMatch);
                    // Did we match all search characters?
                    if (nextMatch == -1) {
                        int foundIndex = text.getIndex() - text.getBeginIndex();
                        //System.out.println("found index:"+foundIndex);
                        if (matchType == MatchType.CONTAINS) {
                            return foundIndex;
                        } else if (matchType == MatchType.STARTS_WITH) {
                            if (!isWordChar(foundIndex - 1)) {
                                return foundIndex;
                            }
                        } else if (matchType == MatchType.FULL_WORD) {
                            if (!isWordChar(foundIndex - 1)
                                    && !isWordChar(foundIndex + matchLowerCase.length)) {
                                return foundIndex;
                            }
                        }
                        nextMatch = matchLowerCase.length - 1;
                    }
                } else {
                    nextMatch = matchLowerCase.length - 1;
                }
            }

            return -1;
        } catch (BadLocationException e) {
            throw new IndexOutOfBoundsException();
        }
    }

    /**
     * Resets the startIndex of the matcher to 0.
     */
    public void reset() {
        startIndex = 0;
    }

    private boolean isWordChar(int index) {
        try {
            char ch = document.getText(index, 1).charAt(0);
            return Character.isLetterOrDigit(ch);
        } catch (BadLocationException e) {
            return false;
        }
    }
}