/* * PatternSearchMatcher.java - Regular expression matcher * :noTabs=false: * * Copyright (C) 2006 Marcelo Vanzin * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package org.gjt.sp.jedit.search; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.gjt.sp.util.ReverseCharSequence; /** * A regular expression string matcher using java.util.regex. * @see java.util.regex.Pattern * * @author Marcelo Vanzin * @version $Id$ * @since jEdit 4.3pre5 */ public class PatternSearchMatcher extends SearchMatcher { //{{{ PatternSearchMatcher constructors /** * Creates a new regular expression string matcher. * @see java.util.regex.Pattern * @param search the search pattern * @param ignoreCase <code>true</code> if you want to ignore case * @since jEdit 4.3pre5 */ public PatternSearchMatcher(String search, boolean ignoreCase) { pattern = search; flags = getFlag(ignoreCase); } /** * Creates a new regular expression string matcher. * @see java.util.regex.Pattern * @param re the compiled regex * @param ignoreCase <code>true</code> if you want to ignore case * @param wholeWord <code>true</code> to search for whole word only * @since jEdit 4.5pre1 */ public PatternSearchMatcher(Pattern re, boolean ignoreCase, boolean wholeWord) { this(re.pattern(), ignoreCase); this.re = re; this.wholeWord = wholeWord; } /** * Creates a new regular expression already compiled. * @see java.util.regex.Pattern * @param re the compiled regex * @param ignoreCase <code>true</code> if you want to ignore case * @since jEdit 4.3pre13 */ public PatternSearchMatcher(Pattern re, boolean ignoreCase) { this(re, ignoreCase, false); } //}}} //{{{ nextMatch() method /** * {@inheritDoc} * <p>Reverse regex search is done by searching from the beginning to * just prior to the current match, so will be inefficient for large * buffers.</p> */ @Override public SearchMatcher.Match nextMatch(CharSequence text, boolean start, boolean end, boolean firstTime, boolean reverse) { // "For the mean time, there is no way to automatically generate a sexeger" // // http://japhy.perlmonk.org/sexeger/sexeger.html // // So ... for reverse regex searches we will search // the string in the forward direction and // return the last match. // Since we search the String in the forward direction, // (even for reverse searches) un-reverse the ReverseCharSequence. if (text instanceof ReverseCharSequence) text = ((ReverseCharSequence)text).baseSequence(); if (re == null) re = Pattern.compile(pattern, flags); // if the pattern begins with "^", avoid spurious match at the // start of input sequence which is not a start of line. int matchStart = 0; if (!start && re.pattern().charAt(0) == '^') { Matcher sol = Pattern.compile("^", flags).matcher(text); // Ignore the first match since it is not a start of line. sol.find(); // If the second match is not found, the real pattern also // can't match. if (!sol.find()) return null; // Skip the text to the second match, which can be the first // match for the real pattern. matchStart = sol.start(); } Matcher match = re.matcher(text); if (!match.find(matchStart)) { // Check for special constructs, groups starting with (? are either pure, // non-capturing groups that do not capture text and do not count towards // the group total, or named-capturing group. Either way, need to remove // them and try again since they won't match because the selection doesn't // necessarily include the non-capturing part. Pattern p = removeNonCapturingGroups(re, flags); match = p.matcher(text); if (!match.matches()) return null; } // Special care for zero width matches. Without this care, // the caller will fall into an infinite loop, for non-reverse // search. if (!reverse && !firstTime && match.start() == 0 && match.end() == 0) { if (!match.find()) return null; } Match previous = null; while (true) { // if we're not at the end of the buffer and we // match the end of the text, and the pattern ends with a "$", // ignore the match. // The match at the end the buffer which immediately follows // the final newline is also ignored because it is generally // not expected as an EOL. if ((!end || (text.charAt(text.length() - 1) == '\n')) && match.end() == text.length() && pattern.charAt(pattern.length() - 1) == '$') { if (previous != null) { returnValue.start = previous.start; returnValue.end = previous.end; returnValue.substitutions = previous.substitutions; break; } else { return null; } } returnValue.substitutions = new String[match.groupCount() + 1]; for(int i = 0; i < returnValue.substitutions.length; i++) { returnValue.substitutions[i] = match.group(i); } int _start = match.start(); int _end = match.end(); returnValue.start = _start; returnValue.end = _end; if (wholeWord && !isWholeWord(text, _start, _end)) { if (!match.find()) return null; continue; } // For non-reversed searches, we break immediately // to return the first match. For reversed searches, // we continue until no more matches are found if (!reverse || !match.find()) { // For reverse search, check for zero width match at // the end of text. if (reverse && !firstTime && returnValue.start == text.length() && returnValue.end == text.length()) { if (previous != null) { returnValue.start = previous.start; returnValue.end = previous.end; returnValue.substitutions = previous.substitutions; } else { return null; } } break; } // Save the result for reverse zero width match. if (previous == null) { previous = new Match(); } previous.start = returnValue.start; previous.end = returnValue.end; previous.substitutions = returnValue.substitutions; } if (reverse) { // The caller assumes we are searching a reversed // CharSegment, so we need to reverse the indices // before returning int len = returnValue.end - returnValue.start; returnValue.start = text.length() - returnValue.end; returnValue.end = returnValue.start + len; } return returnValue; } //}}} //{{{ removeNonCapturingGroups() method public static Pattern removeNonCapturingGroups( Pattern re, int flags ) { String p = re.pattern(); String ncgroups = "[(][?].+?[)]"; Pattern nc_pattern = Pattern.compile( ncgroups, flags ); Matcher nc_matcher = nc_pattern.matcher( p ); if ( nc_matcher.find() ) { int index = nc_matcher.start(); int open_count = 0; for (int i = index; i < p.length(); i++) { if (p.charAt(i) == '(' && (i == 0 || p.charAt(i - 1) != '\\')) { ++ open_count; } if (p.charAt(i) == ')' && (i == 0 || p.charAt(i - 1) != '\\')) { -- open_count; } if (open_count == 0 && i < p.length() - 1) { int end = i + 1; char c = p.charAt( end ); // check for "{n,m}" quantifiers if (c == '{') { while (c != '}' && end < p.length() - 1) { ++ end; c = p.charAt(end); } ++ end; } // check for ?+* quanitifiers c = p.charAt(end); if ((c == '?' || c == '+' || c == '*') && end < p.length() - 1) { ++ end; } // check for ?+ quantifier quantifiers c = p.charAt(end); if ((c == '?' || c == '+') && end < p.length() - 1) { ++ end; } // delete the non-capturing group StringBuilder sb = new StringBuilder(p); sb.delete(index, end); // recurse to find any remaining non-capturing groups return removeNonCapturingGroups(Pattern.compile(sb.toString(), flags), flags); } } } return re; } //}}} //{{{ toString() method @Override public String toString() { boolean ignoreCase = (flags & Pattern.CASE_INSENSITIVE) != 0; return "PatternSearchMatcher[" + pattern + ',' + ignoreCase + ']'; } //}}} static int getFlag(boolean ignoreCase) { int flags = Pattern.MULTILINE; if (ignoreCase) flags |= Pattern.CASE_INSENSITIVE; return flags; } //{{{ Private members private int flags; private Pattern re; private final String pattern; //}}} }