PatternSearchMatcher.java example

Explorer
jedit_cc4401-master
/*
 * PatternSearchMatcher.java - Regular expression matcher
 * :noTabs=false:
 *
 * Copyright (C) 2006 Marcelo Vanzin
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

package org.gjt.sp.jedit.search;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.gjt.sp.util.ReverseCharSequence;

/**
 * A regular expression string matcher using java.util.regex.
 * @see java.util.regex.Pattern
 *
 * @author Marcelo Vanzin
 * @version $Id: PatternSearchMatcher.java 19386 2011-02-24 11:06:57Z kpouer $
 * @since jEdit 4.3pre5
 */
public class PatternSearchMatcher extends SearchMatcher
{
	//{{{ PatternSearchMatcher constructors
	/**
	 * Creates a new regular expression string matcher.
	 * @see java.util.regex.Pattern
	 * @param search the search pattern
	 * @param ignoreCase <code>true</code> if you want to ignore case
	 * @since jEdit 4.3pre5
	 */
	public PatternSearchMatcher(String search, boolean ignoreCase)
	{
		pattern = search;
		flags = getFlag(ignoreCase);
	}

	/**
	 * Creates a new regular expression string matcher.
	 * @see java.util.regex.Pattern
	 * @param re the compiled regex
	 * @param ignoreCase <code>true</code> if you want to ignore case
	 * @param wholeWord <code>true</code> to search for whole word only
	 * @since jEdit 4.5pre1
	 */
	public PatternSearchMatcher(Pattern re, boolean ignoreCase, boolean wholeWord)
	{
		this(re.pattern(), ignoreCase);
		this.re = re;
		this.wholeWord = wholeWord;
	}
	
	/**
	 * Creates a new regular expression already compiled.
	 * @see java.util.regex.Pattern
	 * @param re the compiled regex
	 * @param ignoreCase <code>true</code> if you want to ignore case
	 * @since jEdit 4.3pre13
	 */
	public PatternSearchMatcher(Pattern re, boolean ignoreCase)
	{
		this(re, ignoreCase, false);
	} //}}}

	//{{{ nextMatch() method
	/**
	 * {@inheritDoc}
	 * <p>Reverse regex search is done by searching from the beginning to
	 * just prior to the current match, so will be inefficient for large
	 * buffers.</p>
	 */
	@Override
	public SearchMatcher.Match nextMatch(CharSequence text, boolean start,
		boolean end, boolean firstTime, boolean reverse)
	{
		// "For the mean time, there is no way to automatically generate a sexeger"
		//
		// http://japhy.perlmonk.org/sexeger/sexeger.html
		//
		// So ... for reverse regex searches we will search 
		// the string in the forward direction and 
		// return the last match.
		
		// Since we search the String in the forward direction,
		// (even for reverse searches) un-reverse the ReverseCharSequence.
		if (text instanceof ReverseCharSequence)
			text = ((ReverseCharSequence)text).baseSequence();

		if (re == null)
			re = Pattern.compile(pattern, flags);

		// if the pattern begins with "^", avoid spurious match at the
		// start of input sequence which is not a start of line.
		int matchStart = 0;
		if (!start && re.pattern().charAt(0) == '^')
		{
			Matcher sol = Pattern.compile("^", flags).matcher(text);
			// Ignore the first match since it is not a start of line.
			sol.find();
			// If the second match is not found, the real pattern also
			// can't match.
			if (!sol.find())
				return null;
			// Skip the text to the second match, which can be the first
			// match for the real pattern.
			matchStart = sol.start();
		}

		Matcher match = re.matcher(text);
		if (!match.find(matchStart))
			return null;

		// Special care for zero width matches. Without this care,
		// the caller will fall into an infinite loop, for non-reverse
		// search.
		if (!reverse && !firstTime && match.start() == 0 && match.end() == 0)
		{
			if (!match.find())
				return null;
		}

		Match previous = null;
		while (true)
		{
			// if we're not at the end of the buffer and we
			// match the end of the text, and the pattern ends with a "$",
			// ignore the match.
			// The match at the end the buffer which immediately follows
			// the final newline is also ignored because it is generally
			// not expected as an EOL.
			if ((!end || (text.charAt(text.length() - 1) == '\n'))
				&& match.end() == text.length()
				&& pattern.charAt(pattern.length() - 1) == '$')
			{
				if (previous != null)
				{
					returnValue.start = previous.start;
					returnValue.end = previous.end;
					returnValue.substitutions = previous.substitutions;
					break;
				}
				else
				{
					return null;
				}
			}

			returnValue.substitutions = new String[match.groupCount() + 1];
			for(int i = 0; i < returnValue.substitutions.length; i++)
			{
				returnValue.substitutions[i] = match.group(i);
			}
	
			int _start = match.start();
			int _end = match.end();
	
			returnValue.start = _start;
			returnValue.end = _end;

			if (wholeWord && !isWholeWord(text, _start, _end))
			{
				if (!match.find())
					return null;
				continue;
			}

			// For non-reversed searches, we break immediately
			// to return the first match.  For reversed searches,
			// we continue until no more matches are found
			if (!reverse || !match.find())
			{
				// For reverse search, check for zero width match at
				// the end of text.
				if (reverse && !firstTime && returnValue.start == text.length()
					&& returnValue.end == text.length())
				{
					if (previous != null)
					{
						returnValue.start = previous.start;
						returnValue.end = previous.end;
						returnValue.substitutions = previous.substitutions;
					}
					else
					{
						return null;
					}
				}
				break;
			}
			// Save the result for reverse zero width match.
			if (previous == null)
			{
				previous = new Match();
			}
			previous.start = returnValue.start;
			previous.end = returnValue.end;
			previous.substitutions = returnValue.substitutions;
		}

		if (reverse)
		{
			// The caller assumes we are searching a reversed
			// CharSegment, so we need to reverse the indices
			// before returning
			int len = returnValue.end - returnValue.start;
			returnValue.start = text.length() - returnValue.end;
			returnValue.end = returnValue.start + len;
		}

		return returnValue;
	} //}}}

	//{{{ toString() method
	@Override
	public String toString()
	{
		boolean ignoreCase = (flags & Pattern.CASE_INSENSITIVE) != 0;
		return "PatternSearchMatcher[" + pattern + ',' + ignoreCase + ']';
	} //}}}
	
	static int getFlag(boolean ignoreCase)
	{
		int flags = Pattern.MULTILINE;
		if (ignoreCase)
			flags |= Pattern.CASE_INSENSITIVE;
		return flags;
	}

	//{{{ Private members
	private int flags;
	private Pattern	re;
	private final String pattern;
	//}}}
}