/*******************************************************************************
* Copyright (c) 2011, 2016 Tomasz Wesolowski and others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Tomasz Wesolowski - initial API and implementation
* Jens Elmenthaler - further tweaking
*******************************************************************************/
package org.eclipse.cdt.core.parser.util;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A matcher for camel case matching supporting both the camel case as well as
* he underscore notation.
*
* @noextend This class is not intended to be subclassed by clients.
* @since 5.3
*/
public class SegmentMatcher {
private final char[] prefixForBinarySearch;
/** The string the any prefix match has to start with. */
private final char[] prefixForMatching;
/** The regular expression for a segment match. */
private final Pattern regexp;
/** The minimum length any name must have in order to match. */
private final int minNameLength;
private final boolean singleSegment;
/**
* @param pattern
* The camel case or underscore pattern.
*/
public SegmentMatcher(char[] pattern) {
if (pattern == null || pattern.length == 0) {
prefixForMatching = CharArrayUtils.EMPTY;
prefixForBinarySearch = CharArrayUtils.EMPTY;
regexp = null;
minNameLength = 0;
singleSegment = true;
} else {
StringBuilder regexpBuffer = new StringBuilder("^"); //$NON-NLS-1$
int i = 0;
int lengthOfFirstSegment = 0;
char currentChar;
int segmentCount = 0;
// Translate each segment
while (i < pattern.length) {
boolean separatorSpecified = false;
// Handle prefix, i.e. anything before the first letter or digit
for (; i < pattern.length; ++i) {
currentChar = pattern[i];
if (Character.isLetterOrDigit(currentChar)) {
break;
} else {
// Quote those characters.
regexpBuffer.append(Pattern.quote(String.valueOf(currentChar)));
separatorSpecified = true;
}
}
if (i < pattern.length) {
// The character here is always a letter or digit.
currentChar = pattern[i];
if (Character.isDigit(currentChar)) {
// Handle number segment
regexpBuffer.append(currentChar);
for (++i; i < pattern.length; ++i) {
currentChar = pattern[i];
if (Character.isDigit(currentChar)) {
regexpBuffer.append(currentChar);
} else {
break;
}
}
} else {
// Handle text segment
char lower = Character.toLowerCase(currentChar);
char upper = Character.toUpperCase(currentChar);
if ((segmentCount == 0) || separatorSpecified) {
regexpBuffer.append(currentChar);
} else {
regexpBuffer.append("(_["); //$NON-NLS-1$
regexpBuffer.append(lower);
regexpBuffer.append(upper);
regexpBuffer.append("]|"); //$NON-NLS-1$
regexpBuffer.append(upper);
regexpBuffer.append(')');
}
// Remaining letters of the segment
for (++i; i < pattern.length; ++i) {
currentChar = pattern[i];
if (Character.isLetter(currentChar)) {
if (Character.isUpperCase(currentChar)) {
break;
} else {
lower = currentChar;
upper = Character.toUpperCase(currentChar);
regexpBuffer.append('[');
regexpBuffer.append(lower);
regexpBuffer.append(upper);
regexpBuffer.append(']');
}
} else {
break;
}
}
}
}
regexpBuffer.append(".*"); //$NON-NLS-1$
if (segmentCount == 0) {
lengthOfFirstSegment = i;
}
++segmentCount;
}
regexp = Pattern.compile(regexpBuffer.toString());
singleSegment = (segmentCount == 1);
prefixForMatching = pattern;
// The first segment is also the binary search prefix
prefixForBinarySearch = CharArrayUtils.extract(pattern, 0, lengthOfFirstSegment);
minNameLength = pattern.length;
}
}
/**
* Matches the given name by prefix and segment matching.
*
* @return true if the associated pattern is a prefix-based or segment-based abbreviation of name.
*/
public boolean match(char[] name) {
if (matchPrefix(name)) {
return true;
}
// If there is only a single segment given and prefix match failed,
// the segment match cannot pass either. So skip it.
if (singleSegment) {
return false;
}
return matchSegments(name);
}
/**
* Matches the given name by prefix matching.
*
* @return true if the associated pattern is a prefix-based abbreviation of name.
*/
public boolean matchPrefix(char[] name) {
return (CharArrayUtils.equals(name, 0, prefixForMatching.length, prefixForMatching, true));
}
/**
* Matches the given name by segment matching.
*
* @return true if the associated pattern is a segment-based abbreviation of name.
*/
public boolean matchSegments(char[] name) {
if (name == null) {
return false;
}
if (name.length < minNameLength) {
return false;
}
if (regexp == null) {
return true;
}
Matcher matcher = regexp.matcher(String.valueOf(name));
return matcher.find();
}
/**
* Matches pattern to name by prefix and segment matching. If you have to match
* against the same pattern repeatedly, create a {@link SegmentMatcher} instead
* and re-use it all the time, because this is much faster.
*
* @return true if pattern is a prefix-based or segment-based abbreviation of name
*/
public static boolean match(char[] pattern, char[] name) {
return (new SegmentMatcher(pattern)).match(name);
}
/**
* The pattern used by this matcher is not suitable for binary searches
* (e.g. within the index).
* However, there can be calculated a string that can be used in the
* context of binary searches.
* In the compare method used by your binary search, return 0 for any string
* that starts with the returned string.
*
* @return Such a string.
*/
public char[] getPrefixForBinarySearch() {
return prefixForBinarySearch;
}
/**
* @return If false, calling @{@link #match(char[])} can be skipped if a
* name survived a binary search using the prefix returned by
* @{@link #getPrefixForBinarySearch()} as key.
*/
public boolean matchRequiredAfterBinarySearch() {
return !singleSegment;
}
}