StringOperation.java example

Explorer
CodingSpectator-master
- plug-ins
/*******************************************************************************
 * Copyright (c) 2000, 2010 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *******************************************************************************/
package org.eclipse.jdt.internal.core.search;

import org.eclipse.jdt.core.compiler.CharOperation;
import org.eclipse.jdt.internal.compiler.parser.ScannerHelper;

/**
 * This class is a collection of helper methods to manipulate strings during search.
 */
public final class StringOperation {

	private final static int[] EMPTY_REGIONS = new int[0];

/**
 * Answers all the regions in a given name matching a given camel case pattern.
 * </p><p>
 * Each of these regions is made of its starting index and its length in the given
 * name. They are all concatenated in a single array of <code>int</code>
 * which therefore always has an even length.
 * </p><p>
 * Note that each region is disjointed from the following one.<br>
 * E.g. if the regions are <code>{ start1, length1, start2, length2 }</code>,
 * then <code>start1+length1</code> will always be smaller than
 * <code>start2</code>.
 * </p><p>
 * <pre>
 * Examples:
 * <ol><li>  pattern = "NPE"
 *  name = NullPointerException / NoPermissionException
 *  result:  { 0, 1, 4, 1, 11, 1 } / { 0, 1, 2, 1, 12, 1 } </li>
 * <li>  pattern = "NuPoEx"
 *  name = NullPointerException
 *  result:  { 0, 2, 4, 2, 11, 2 }</li>
 * <li>  pattern = "IPL3"
 *  name = "IPerspectiveListener3"
 *  result:  { 0, 2, 12, 1, 20, 1 }</li>
 * <li>  pattern = "HashME"
 *  name = "HashMapEntry"
 *  result:  { 0, 5, 7, 1 }</li>
 * </ol></pre>
 *</p>
 * @see CharOperation#camelCaseMatch(char[], int, int, char[], int, int, boolean)
 * 	for more details on the camel case behavior
 * @see CharOperation#match(char[], char[], boolean) for more details on the
 * 	pattern match behavior
 *
 * @param pattern the given pattern
 * @param patternStart the start index of the pattern, inclusive
 * @param patternEnd the end index of the pattern, exclusive
 * @param name the given name
 * @param nameStart the start index of the name, inclusive
 * @param nameEnd the end index of the name, exclusive
 * @param samePartCount flag telling whether the pattern and the name should
 * 	have the same count of parts or not.<br>
 * 	  For example:
 * 	<ul>
 * 		<li>'HM' type string pattern will match 'HashMap' and 'HtmlMapper' types,
 * 				but not 'HashMapEntry'</li>
 * 		<li>'HMap' type string pattern will still match previous 'HashMap' and
 * 				'HtmlMapper' types, but not 'HighMagnitude'</li>
 * 	</ul>
 * @return an array of <code>int</code> having two slots per returned
 * 	regions (first one is the starting index of the region and the second
 * 	one the length of the region).<br>
 * 	Note that it may be <code>null</code> if the given name does not match
 * 	the pattern
 * @since 3.5
 */
public static final int[] getCamelCaseMatchingRegions(String pattern, int patternStart, int patternEnd, String name, int nameStart, int nameEnd, boolean samePartCount) {

	/* !!!!!!!!!! WARNING !!!!!!!!!!
	 * The algorithm used in this method has been fully inspired from
	 * CharOperation#camelCaseMatch(char[], int, int, char[], int, int, boolean).
	 *
	 * So, if any change needs to be applied in the algorithm, do NOT forget
	 * to backport it in the CharOperation method!
	 */

	if (name == null)
		return null; // null name cannot match
	if (pattern == null) {
		// null pattern cannot match any region
		// see bug https://bugs.eclipse.org/bugs/show_bug.cgi?id=264816
		return EMPTY_REGIONS;
	}
	if (patternEnd < 0) 	patternEnd = pattern.length();
	if (nameEnd < 0) nameEnd = name.length();

	if (patternEnd <= patternStart) {
		return nameEnd <= nameStart
			? new int[] { patternStart, patternEnd-patternStart }
			: null;
	}
	if (nameEnd <= nameStart) return null;
	// check first pattern char
	if (name.charAt(nameStart) != pattern.charAt(patternStart)) {
		// first char must strictly match (upper/lower)
		return null;
	}

	char patternChar, nameChar;
	int iPattern = patternStart;
	int iName = nameStart;

	// init segments
	int parts = 1;
	for (int i=patternStart+1; i<patternEnd; i++) {
		final char ch = pattern.charAt(i);
		if (ch < ScannerHelper.MAX_OBVIOUS) {
			if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[ch] & (ScannerHelper.C_UPPER_LETTER | ScannerHelper.C_DIGIT)) != 0) {
				parts++;
			}
		} else if (Character.isJavaIdentifierPart(ch) && (Character.isUpperCase(ch) || Character.isDigit(ch))) {
			parts++;
		}
	}
	int[] segments = null;
	int count = 0; // count

	// Main loop is on pattern characters
	int segmentStart = iName;
	while (true) {
		iPattern++;
		iName++;

		if (iPattern == patternEnd) { // we have exhausted pattern...
			// it's a match if the name can have additional parts (i.e. uppercase characters) or is also exhausted
			if (!samePartCount || iName == nameEnd) {
				if (segments == null) {
					segments = new int[2];
				}
				segments[count++] = segmentStart;
				segments[count++] = iName - segmentStart;
				if (count < segments.length) {
					System.arraycopy(segments, 0, segments = new int[count], 0, count);
				}
				return segments;
			}

			// otherwise it's a match only if the name has no more uppercase characters
			int segmentEnd = iName;
			while (true) {
				if (iName == nameEnd) {
					// we have exhausted the name, so it's a match
					if (segments == null) {
						segments = new int[2];
					}
					segments[count++] = segmentStart;
					segments[count++] = segmentEnd - segmentStart;
					if (count < segments.length) {
						System.arraycopy(segments, 0, segments = new int[count], 0, count);
					}
					return segments;
				}
				nameChar = name.charAt(iName);
				// test if the name character is uppercase
				if (nameChar < ScannerHelper.MAX_OBVIOUS) {
					if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[nameChar] & ScannerHelper.C_UPPER_LETTER) != 0) {
						return null;
					}
				}
				else if (!Character.isJavaIdentifierPart(nameChar) || Character.isUpperCase(nameChar)) {
					return null;
				}
				iName++;
			}
		}

		if (iName == nameEnd){
			// We have exhausted the name (and not the pattern), so it's not a match
			return null;
		}

		// For as long as we're exactly matching, bring it on (even if it's a lower case character)
		if ((patternChar = pattern.charAt(iPattern)) == name.charAt(iName)) {
			continue;
		}
		int segmentEnd = iName;

		// If characters are not equals, then it's not a match if patternChar is lowercase
		if (patternChar < ScannerHelper.MAX_OBVIOUS) {
			if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[patternChar] & (ScannerHelper.C_UPPER_LETTER | ScannerHelper.C_DIGIT)) == 0) {
				return null;
			}
		} else if (Character.isJavaIdentifierPart(patternChar) && !Character.isUpperCase(patternChar) && !Character.isDigit(patternChar)) {
			return null;
		}

		// patternChar is uppercase, so let's find the next uppercase in name
		while (true) {
			if (iName == nameEnd){
	            //	We have exhausted name (and not pattern), so it's not a match
				return null;
			}

			nameChar = name.charAt(iName);
			if (nameChar < ScannerHelper.MAX_OBVIOUS) {
				int charNature = ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[nameChar];
				if ((charNature & (ScannerHelper.C_LOWER_LETTER | ScannerHelper.C_SPECIAL)) != 0) {
					// nameChar is lowercase
					iName++;
				} else if ((charNature & ScannerHelper.C_DIGIT) != 0) {
					// nameChar is digit => break if the digit is current pattern character otherwise consume it
					if (patternChar == nameChar) break;
					iName++;
				// nameChar is uppercase...
				} else  if (patternChar != nameChar) {
					//.. and it does not match patternChar, so it's not a match
					return null;
				} else {
					//.. and it matched patternChar. Back to the big loop
					break;
				}
			}
			// Same tests for non-obvious characters
			else if (Character.isJavaIdentifierPart(nameChar) && !Character.isUpperCase(nameChar)) {
				iName++;
			} else if (Character.isDigit(nameChar)) {
				if (patternChar == nameChar) break;
				iName++;
			} else  if (patternChar != nameChar) {
				return null;
			} else {
				break;
			}
		}
		// At this point, either name has been exhausted, or it is at an uppercase letter.
		// Since pattern is also at an uppercase letter
		if (segments == null) {
			segments = new int[parts*2];
		}
		segments[count++] = segmentStart;
		segments[count++] = segmentEnd - segmentStart;
		segmentStart = iName;
	}
}

/**
 * Answers all the regions in a given name matching a given <i>pattern</i>
 * pattern (e.g. "H*M??").
 * </p><p>
 * Each of these regions is made of its starting index and its length in the given
 * name. They are all concatenated in a single array of <code>int</code>
 * which therefore always has an even length.
 * </p><p>
 * Note that each region is disjointed from the following one.<br>
 * E.g. if the regions are <code>{ start1, length1, start2, length2 }</code>,
 * then <code>start1+length1</code> will always be smaller than
 * <code>start2</code>.
 * </p><p>
 * <pre>
 * Examples:
 * <ol>
 * <li>  pattern = "N???Po*Ex?eption"
 *  name = NullPointerException
 *  result:  { 0, 1, 4, 2, 11, 2, 14, 6 }</li>
 * <li>  pattern = "Ha*M*ent*"
 *  name = "HashMapEntry"
 *  result:  { 0, 2, 4, 1, 7, 3 }</li>
 * </ol></pre>
 *</p>
 * @see CharOperation#match(char[], char[], boolean) for more details on the
 * 	pattern match behavior
 *
 * @param pattern the given pattern
 * @param patternStart the given pattern start
 * @param patternEnd the given pattern end
 * @param name the given name
 * @param nameStart the given name start
 * @param nameEnd the given name end
 * @param isCaseSensitive flag to know if the matching should be case sensitive
 * @return an array of <code>int</code> having two slots per returned
 * 	regions (first one is the starting index of the region and the second
 * 	one the length of the region).<br>
 * 	Note that it may be <code>null</code> if the given name does not match
 * 	the pattern
 * @since 3.5
 */
public static final int[] getPatternMatchingRegions(
	String pattern,
	int patternStart,
	int patternEnd,
	String name,
	int nameStart,
	int nameEnd,
	boolean isCaseSensitive) {

	/* !!!!!!!!!! WARNING !!!!!!!!!!
	 * The algorithm used in this method has been fully inspired from
	 * CharOperation#match(char[], int, int, char[], int, int, boolean).
	 *
	 * So, if any change needs to be applied in the algorithm, do NOT forget
	 * to backport it in the CharOperation method!
	 */

	if (name == null) return null; // null name cannot match
	if (pattern == null) {
		// null pattern cannot match any region
		// see bug https://bugs.eclipse.org/bugs/show_bug.cgi?id=264816
		return EMPTY_REGIONS;
	}
	int iPattern = patternStart;
	int iName = nameStart;

	// init segments parts
	if (patternEnd < 0)
		patternEnd = pattern.length();
	if (nameEnd < 0)
		nameEnd = name.length();
	int questions = 0;
	int parts = 0;
	char previous = 0;
	for (int i=patternStart; i<patternEnd; i++) {
		char ch = pattern.charAt(i);
		switch (ch) {
			case '?':
				questions++;
				break;
			case '*':
				break;
			default:
				switch (previous) {
					case 0 :
					case '?':
					case '*':
						parts++;
						break;
				}
		}
		previous = ch;
	}
	if (parts == 0) {
		if (questions <= (nameEnd - nameStart)) return EMPTY_REGIONS;
		return null;
	}
	int[] segments = new int[parts*2];

	/* check first segment */
	int count = 0;
	int start = iName;
	char patternChar = 0;
	previous = 0;
	while ((iPattern < patternEnd)
		&& (patternChar = pattern.charAt(iPattern)) != '*') {
		if (iName == nameEnd)
			return null;
		if (patternChar == '?') {
			switch (previous) {
				case 0:
				case '?':
					break;
				default:
					segments[count++] = start;
					segments[count++] = iPattern-start;
					break;
			}
		} else {
			if (isCaseSensitive) {
				if (patternChar != name.charAt(iName)) {
					return null;
				}
			} else if (ScannerHelper.toLowerCase(patternChar) != ScannerHelper.toLowerCase(name.charAt(iName))) {
				return null;
			}
			switch (previous) {
				case 0:
				case '?':
					start = iPattern;
					break;
			}
		}
		iName++;
		iPattern++;
		previous = patternChar;
	}
	/* check sequence of star+segment */
	int segmentStart;
	if (patternChar == '*') {
		if (iPattern > 0 && previous != '?') {
			segments[count++] = start;
			segments[count++] = iName-start;
			start = iName;
		}
		segmentStart = ++iPattern; // skip star
	} else {
		if (iName == nameEnd) {
			if (count == (parts*2)) return segments;
			int end = patternEnd;
			if (previous == '?') { // last char was a '?' => purge all trailing '?'
				while (pattern.charAt(--end-1) == '?') {
					if (end == start) {
						return new int[] { patternStart, patternEnd-patternStart };
					}
				}
			}
			return new int[] { start, end-start };
		}
		return null;
	}
	int prefixStart = iName;
	int previousCount = count;
	previous = patternChar;
	char previousSegment = patternChar;
	checkSegment : while (iName < nameEnd) {
		if (iPattern == patternEnd) {
			iPattern = segmentStart; // mismatch - restart current segment
			iName = ++prefixStart;
			previous = previousSegment;
			continue checkSegment;
		}
		/* segment is ending */
		if ((patternChar = pattern.charAt(iPattern)) == '*') {
			segmentStart = ++iPattern; // skip star
			if (segmentStart == patternEnd) {
				if (count < (parts*2)) {
					segments[count++] = start;
					segments[count++] = iName-start;
				}
				return segments;
			}
			switch (previous) {
				case '*':
				case '?':
					break;
				default:
					segments[count++] = start;
					segments[count++] = iName-start;
					break;
			}
			prefixStart = iName;
			start = prefixStart;
			previous = patternChar;
			previousSegment = patternChar;
			continue checkSegment;
		}
		/* check current name character */
		previousCount = count;
		if (patternChar == '?') {
			switch (previous) {
				case '*':
				case '?':
					break;
				default:
					segments[count++] = start;
					segments[count++] = iName-start;
					break;
			}
		} else {
			boolean mismatch;
			if (isCaseSensitive) {
				mismatch = name.charAt(iName) != patternChar;
			} else {
				mismatch = ScannerHelper.toLowerCase(name.charAt(iName)) != ScannerHelper.toLowerCase(patternChar);
			}
			if (mismatch) {
				iPattern = segmentStart; // mismatch - restart current segment
				iName = ++prefixStart;
				start = prefixStart;
				count = previousCount;
				previous = previousSegment;
				continue checkSegment;
			}
			switch (previous) {
				case '?':
					start = iName;
					break;
			}
		}
		iName++;
		iPattern++;
		previous = patternChar;
	}

	if ((segmentStart == patternEnd)
		|| (iName == nameEnd && iPattern == patternEnd)
		|| (iPattern == patternEnd - 1 && pattern.charAt(iPattern) == '*')) {
		if (count < (parts*2)) {
			segments[count++] = start;
			segments[count++] = iName-start;
		}
		return segments;
	}
	return null;
}
}