/* * PatternSearchMatcher.java - Regular expression matcher * :noTabs=false: * * Copyright (C) 2006 Marcelo Vanzin * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package org.gjt.sp.jedit.search; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.gjt.sp.util.ReverseCharSequence; /** * A regular expression string matcher using java.util.regex. * @see java.util.regex.Pattern * * @author Marcelo Vanzin * @version $Id: PatternSearchMatcher.java 19386 2011-02-24 11:06:57Z kpouer $ * @since jEdit 4.3pre5 */ public class PatternSearchMatcher extends SearchMatcher { //{{{ PatternSearchMatcher constructors /** * Creates a new regular expression string matcher. * @see java.util.regex.Pattern * @param search the search pattern * @param ignoreCase <code>true</code> if you want to ignore case * @since jEdit 4.3pre5 */ public PatternSearchMatcher(String search, boolean ignoreCase) { pattern = search; flags = getFlag(ignoreCase); } /** * Creates a new regular expression string matcher. * @see java.util.regex.Pattern * @param re the compiled regex * @param ignoreCase <code>true</code> if you want to ignore case * @param wholeWord <code>true</code> to search for whole word only * @since jEdit 4.5pre1 */ public PatternSearchMatcher(Pattern re, boolean ignoreCase, boolean wholeWord) { this(re.pattern(), ignoreCase); this.re = re; this.wholeWord = wholeWord; } /** * Creates a new regular expression already compiled. * @see java.util.regex.Pattern * @param re the compiled regex * @param ignoreCase <code>true</code> if you want to ignore case * @since jEdit 4.3pre13 */ public PatternSearchMatcher(Pattern re, boolean ignoreCase) { this(re, ignoreCase, false); } //}}} //{{{ nextMatch() method /** * {@inheritDoc} * <p>Reverse regex search is done by searching from the beginning to * just prior to the current match, so will be inefficient for large * buffers.</p> */ @Override public SearchMatcher.Match nextMatch(CharSequence text, boolean start, boolean end, boolean firstTime, boolean reverse) { // "For the mean time, there is no way to automatically generate a sexeger" // // http://japhy.perlmonk.org/sexeger/sexeger.html // // So ... for reverse regex searches we will search // the string in the forward direction and // return the last match. // Since we search the String in the forward direction, // (even for reverse searches) un-reverse the ReverseCharSequence. if (text instanceof ReverseCharSequence) text = ((ReverseCharSequence)text).baseSequence(); if (re == null) re = Pattern.compile(pattern, flags); // if the pattern begins with "^", avoid spurious match at the // start of input sequence which is not a start of line. int matchStart = 0; if (!start && re.pattern().charAt(0) == '^') { Matcher sol = Pattern.compile("^", flags).matcher(text); // Ignore the first match since it is not a start of line. sol.find(); // If the second match is not found, the real pattern also // can't match. if (!sol.find()) return null; // Skip the text to the second match, which can be the first // match for the real pattern. matchStart = sol.start(); } Matcher match = re.matcher(text); if (!match.find(matchStart)) return null; // Special care for zero width matches. Without this care, // the caller will fall into an infinite loop, for non-reverse // search. if (!reverse && !firstTime && match.start() == 0 && match.end() == 0) { if (!match.find()) return null; } Match previous = null; while (true) { // if we're not at the end of the buffer and we // match the end of the text, and the pattern ends with a "$", // ignore the match. // The match at the end the buffer which immediately follows // the final newline is also ignored because it is generally // not expected as an EOL. if ((!end || (text.charAt(text.length() - 1) == '\n')) && match.end() == text.length() && pattern.charAt(pattern.length() - 1) == '$') { if (previous != null) { returnValue.start = previous.start; returnValue.end = previous.end; returnValue.substitutions = previous.substitutions; break; } else { return null; } } returnValue.substitutions = new String[match.groupCount() + 1]; for(int i = 0; i < returnValue.substitutions.length; i++) { returnValue.substitutions[i] = match.group(i); } int _start = match.start(); int _end = match.end(); returnValue.start = _start; returnValue.end = _end; if (wholeWord && !isWholeWord(text, _start, _end)) { if (!match.find()) return null; continue; } // For non-reversed searches, we break immediately // to return the first match. For reversed searches, // we continue until no more matches are found if (!reverse || !match.find()) { // For reverse search, check for zero width match at // the end of text. if (reverse && !firstTime && returnValue.start == text.length() && returnValue.end == text.length()) { if (previous != null) { returnValue.start = previous.start; returnValue.end = previous.end; returnValue.substitutions = previous.substitutions; } else { return null; } } break; } // Save the result for reverse zero width match. if (previous == null) { previous = new Match(); } previous.start = returnValue.start; previous.end = returnValue.end; previous.substitutions = returnValue.substitutions; } if (reverse) { // The caller assumes we are searching a reversed // CharSegment, so we need to reverse the indices // before returning int len = returnValue.end - returnValue.start; returnValue.start = text.length() - returnValue.end; returnValue.end = returnValue.start + len; } return returnValue; } //}}} //{{{ toString() method @Override public String toString() { boolean ignoreCase = (flags & Pattern.CASE_INSENSITIVE) != 0; return "PatternSearchMatcher[" + pattern + ',' + ignoreCase + ']'; } //}}} static int getFlag(boolean ignoreCase) { int flags = Pattern.MULTILINE; if (ignoreCase) flags |= Pattern.CASE_INSENSITIVE; return flags; } //{{{ Private members private int flags; private Pattern re; private final String pattern; //}}} }