/*
* Copyright (c) 2013, the Dart project authors.
*
* Licensed under the Eclipse Public License v1.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.eclipse.org/legal/epl-v10.html
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.dart.engine.utilities.general;
/**
* @coverage dart.engine.utilities
*/
public class CharOperation {
/**
* Return true if the pattern matches the given name using CamelCase rules, or false otherwise.
* char[] CamelCase matching does NOT accept explicit wild-cards '*' and '?' and is inherently
* case sensitive.
* <p>
* CamelCase denotes the convention of writing compound names without spaces, and capitalizing
* every term. This function recognizes both upper and lower CamelCase, depending whether the
* leading character is capitalized or not. The leading part of an upper CamelCase pattern is
* assumed to contain a sequence of capitals which are appearing in the matching name; e.g. 'NPE'
* will match 'NullPointerException', but not 'NewPerfData'. A lower CamelCase pattern uses a
* lowercase first character. In Java, type names follow the upper CamelCase convention, whereas
* method or field names follow the lower CamelCase convention.
* <p>
* The pattern may contain lowercase characters, which will be matched in a case sensitive way.
* These characters must appear in sequence in the name. For instance, 'NPExcep' will match
* 'NullPointerException', but not 'NullPointerExCEPTION' or 'NuPoEx' will match
* 'NullPointerException', but not 'NoPointerException'.
* <p>
* Digit characters are treated in a special way. They can be used in the pattern but are not
* always considered as leading character. For instance, both 'UTF16DSS' and 'UTFDSS' patterns
* will match 'UTF16DocumentScannerSupport'.
* <p>
* Using this method allows matching names to have more parts than the specified pattern (see
* {@link #camelCaseMatch(char[], char[], boolean)}).<br>
* For instance, 'HM' , 'HaMa' and 'HMap' patterns will match 'HashMap', 'HatMapper' <b>and
* also</b> 'HashMapEntry'.
* <p>
*
* <pre>
* Examples:<ol>
* <li> pattern = "NPE".toCharArray()
* name = "NullPointerException".toCharArray()
* result => true</li>
* <li> pattern = "NPE".toCharArray()
* name = "NoPermissionException".toCharArray()
* result => true</li>
* <li> pattern = "NuPoEx".toCharArray()
* name = "NullPointerException".toCharArray()
* result => true</li>
* <li> pattern = "NuPoEx".toCharArray()
* name = "NoPermissionException".toCharArray()
* result => false</li>
* <li> pattern = "npe".toCharArray()
* name = "NullPointerException".toCharArray()
* result => false</li>
* <li> pattern = "IPL3".toCharArray()
* name = "IPerspectiveListener3".toCharArray()
* result => true</li>
* <li> pattern = "HM".toCharArray()
* name = "HashMapEntry".toCharArray()
* result => true</li>
* </ol></pre>
*
* @param pattern the given pattern
* @param name the given name
* @return true if the pattern matches the given name, false otherwise
*/
public static final boolean camelCaseMatch(char[] pattern, char[] name) {
// null pattern is equivalent to '*'
if (pattern == null) {
return true;
}
// null name cannot match
if (name == null) {
return false;
}
return camelCaseMatch(pattern, 0, pattern.length, name, 0, name.length, false);
}
/**
* Return true if the pattern matches the given name using CamelCase rules, or false otherwise.
* char[] CamelCase matching does NOT accept explicit wild-cards '*' and '?' and is inherently
* case sensitive.
* <p>
* CamelCase denotes the convention of writing compound names without spaces, and capitalizing
* every term. This function recognizes both upper and lower CamelCase, depending whether the
* leading character is capitalized or not. The leading part of an upper CamelCase pattern is
* assumed to contain a sequence of capitals which are appearing in the matching name; e.g. 'NPE'
* will match 'NullPointerException', but not 'NewPerfData'. A lower CamelCase pattern uses a
* lowercase first character. In Java, type names follow the upper CamelCase convention, whereas
* method or field names follow the lower CamelCase convention.
* <p>
* The pattern may contain lowercase characters, which will be matched in a case sensitive way.
* These characters must appear in sequence in the name. For instance, 'NPExcep' will match
* 'NullPointerException', but not 'NullPointerExCEPTION' or 'NuPoEx' will match
* 'NullPointerException', but not 'NoPointerException'.
* <p>
* Digit characters are treated in a special way. They can be used in the pattern but are not
* always considered as leading character. For instance, both 'UTF16DSS' and 'UTFDSS' patterns
* will match 'UTF16DocumentScannerSupport'.
* <p>
* CamelCase can be restricted to match only the same count of parts. When this restriction is
* specified the given pattern and the given name must have <b>exactly</b> the same number of
* parts (i.e. the same number of uppercase characters).<br>
* For instance, 'HM' , 'HaMa' and 'HMap' patterns will match 'HashMap' and 'HatMapper' <b>but
* not</b> 'HashMapEntry'.
* <p>
*
* <pre>
* Examples:<ol>
* <li> pattern = "NPE".toCharArray()
* name = "NullPointerException".toCharArray()
* result => true</li>
* <li> pattern = "NPE".toCharArray()
* name = "NoPermissionException".toCharArray()
* result => true</li>
* <li> pattern = "NuPoEx".toCharArray()
* name = "NullPointerException".toCharArray()
* result => true</li>
* <li> pattern = "NuPoEx".toCharArray()
* name = "NoPermissionException".toCharArray()
* result => false</li>
* <li> pattern = "npe".toCharArray()
* name = "NullPointerException".toCharArray()
* result => false</li>
* <li> pattern = "IPL3".toCharArray()
* name = "IPerspectiveListener3".toCharArray()
* result => true</li>
* <li> pattern = "HM".toCharArray()
* name = "HashMapEntry".toCharArray()
* result => (samePartCount == false)</li>
* </ol></pre>
*
* @param pattern the given pattern
* @param name the given name
* @param samePartCount flag telling whether the pattern and the name should have the same count
* of parts or not.<br>
* For example:
* <ul>
* <li>'HM' type string pattern will match 'HashMap' and 'HtmlMapper' types, but not
* 'HashMapEntry'</li> <li>'HMap' type string pattern will still match previous 'HashMap'
* and 'HtmlMapper' types, but not 'HighMagnitude'</li>
* </ul>
* @return true if the pattern matches the given name, false otherwise
*/
public static final boolean camelCaseMatch(char[] pattern, char[] name, boolean samePartCount) {
// null pattern is equivalent to '*'
if (pattern == null) {
return true;
}
// null name cannot match
if (name == null) {
return false;
}
return camelCaseMatch(pattern, 0, pattern.length, name, 0, name.length, samePartCount);
}
/**
* Return true if a sub-pattern matches the sub-part of the given name using CamelCase rules, or
* false otherwise. char[] CamelCase matching does NOT accept explicit wild-cards '*' and '?' and
* is inherently case sensitive. Can match only subset of name/pattern, considering end positions
* as non-inclusive. The sub-pattern is defined by the patternStart and patternEnd positions.
* <p>
* CamelCase denotes the convention of writing compound names without spaces, and capitalizing
* every term. This function recognizes both upper and lower CamelCase, depending whether the
* leading character is capitalized or not. The leading part of an upper CamelCase pattern is
* assumed to contain a sequence of capitals which are appearing in the matching name; e.g. 'NPE'
* will match 'NullPointerException', but not 'NewPerfData'. A lower CamelCase pattern uses a
* lowercase first character. In Java, type names follow the upper CamelCase convention, whereas
* method or field names follow the lower CamelCase convention.
* <p>
* The pattern may contain lowercase characters, which will be matched in a case sensitive way.
* These characters must appear in sequence in the name. For instance, 'NPExcep' will match
* 'NullPointerException', but not 'NullPointerExCEPTION' or 'NuPoEx' will match
* 'NullPointerException', but not 'NoPointerException'.
* <p>
* Digit characters are treated in a special way. They can be used in the pattern but are not
* always considered as leading character. For instance, both 'UTF16DSS' and 'UTFDSS' patterns
* will match 'UTF16DocumentScannerSupport'.
* <p>
* Digit characters are treated in a special way. They can be used in the pattern but are not
* always considered as leading character. For instance, both 'UTF16DSS' and 'UTFDSS' patterns
* will match 'UTF16DocumentScannerSupport'.
* <p>
* Using this method allows matching names to have more parts than the specified pattern (see
* {@link #camelCaseMatch(char[], int, int, char[], int, int, boolean)}).<br>
* For instance, 'HM' , 'HaMa' and 'HMap' patterns will match 'HashMap', 'HatMapper' <b>and
* also</b> 'HashMapEntry'.
* <p>
* Examples:
* <ol>
* <li>pattern = "NPE".toCharArray() patternStart = 0 patternEnd = 3 name =
* "NullPointerException".toCharArray() nameStart = 0 nameEnd = 20 result => true</li>
* <li>pattern = "NPE".toCharArray() patternStart = 0 patternEnd = 3 name =
* "NoPermissionException".toCharArray() nameStart = 0 nameEnd = 21 result => true</li>
* <li>pattern = "NuPoEx".toCharArray() patternStart = 0 patternEnd = 6 name =
* "NullPointerException".toCharArray() nameStart = 0 nameEnd = 20 result => true</li>
* <li>pattern = "NuPoEx".toCharArray() patternStart = 0 patternEnd = 6 name =
* "NoPermissionException".toCharArray() nameStart = 0 nameEnd = 21 result => false</li>
* <li>pattern = "npe".toCharArray() patternStart = 0 patternEnd = 3 name =
* "NullPointerException".toCharArray() nameStart = 0 nameEnd = 20 result => false</li>
* <li>pattern = "IPL3".toCharArray() patternStart = 0 patternEnd = 4 name =
* "IPerspectiveListener3".toCharArray() nameStart = 0 nameEnd = 21 result => true</li>
* <li>pattern = "HM".toCharArray() patternStart = 0 patternEnd = 2 name =
* "HashMapEntry".toCharArray() nameStart = 0 nameEnd = 12 result => true</li>
* </ol>
*
* @param pattern the given pattern
* @param patternStart the start index of the pattern, inclusive
* @param patternEnd the end index of the pattern, exclusive
* @param name the given name
* @param nameStart the start index of the name, inclusive
* @param nameEnd the end index of the name, exclusive
* @return true if a sub-pattern matches the sub-part of the given name, false otherwise
*/
public static final boolean camelCaseMatch(char[] pattern, int patternStart, int patternEnd,
char[] name, int nameStart, int nameEnd) {
return camelCaseMatch(pattern, patternStart, patternEnd, name, nameStart, nameEnd, false);
}
/**
* Return true if a sub-pattern matches the sub-part of the given name using CamelCase rules, or
* false otherwise. char[] CamelCase matching does NOT accept explicit wild-cards '*' and '?' and
* is inherently case sensitive. Can match only subset of name/pattern, considering end positions
* as non-inclusive. The sub-pattern is defined by the patternStart and patternEnd positions.
* <p>
* CamelCase denotes the convention of writing compound names without spaces, and capitalizing
* every term. This function recognizes both upper and lower CamelCase, depending whether the
* leading character is capitalized or not. The leading part of an upper CamelCase pattern is
* assumed to contain a sequence of capitals which are appearing in the matching name; e.g. 'NPE'
* will match 'NullPointerException', but not 'NewPerfData'. A lower CamelCase pattern uses a
* lowercase first character. In Java, type names follow the upper CamelCase convention, whereas
* method or field names follow the lower CamelCase convention.
* <p>
* The pattern may contain lowercase characters, which will be matched in a case sensitive way.
* These characters must appear in sequence in the name. For instance, 'NPExcep' will match
* 'NullPointerException', but not 'NullPointerExCEPTION' or 'NuPoEx' will match
* 'NullPointerException', but not 'NoPointerException'.
* <p>
* Digit characters are treated in a special way. They can be used in the pattern but are not
* always considered as leading character. For instance, both 'UTF16DSS' and 'UTFDSS' patterns
* will match 'UTF16DocumentScannerSupport'.
* <p>
* CamelCase can be restricted to match only the same count of parts. When this restriction is
* specified the given pattern and the given name must have <b>exactly</b> the same number of
* parts (i.e. the same number of uppercase characters).<br>
* For instance, 'HM' , 'HaMa' and 'HMap' patterns will match 'HashMap' and 'HatMapper' <b>but
* not</b> 'HashMapEntry'.
* <p>
*
* <pre>
* Examples:
* <ol>
* <li> pattern = "NPE".toCharArray()
* patternStart = 0
* patternEnd = 3
* name = "NullPointerException".toCharArray()
* nameStart = 0
* nameEnd = 20
* result => true</li>
* <li> pattern = "NPE".toCharArray()
* patternStart = 0
* patternEnd = 3
* name = "NoPermissionException".toCharArray()
* nameStart = 0
* nameEnd = 21
* result => true</li>
* <li> pattern = "NuPoEx".toCharArray()
* patternStart = 0
* patternEnd = 6
* name = "NullPointerException".toCharArray()
* nameStart = 0
* nameEnd = 20
* result => true</li>
* <li> pattern = "NuPoEx".toCharArray()
* patternStart = 0
* patternEnd = 6
* name = "NoPermissionException".toCharArray()
* nameStart = 0
* nameEnd = 21
* result => false</li>
* <li> pattern = "npe".toCharArray()
* patternStart = 0
* patternEnd = 3
* name = "NullPointerException".toCharArray()
* nameStart = 0
* nameEnd = 20
* result => false</li>
* <li> pattern = "IPL3".toCharArray()
* patternStart = 0
* patternEnd = 4
* name = "IPerspectiveListener3".toCharArray()
* nameStart = 0
* nameEnd = 21
* result => true</li>
* <li> pattern = "HM".toCharArray()
* patternStart = 0
* patternEnd = 2
* name = "HashMapEntry".toCharArray()
* nameStart = 0
* nameEnd = 12
* result => (samePartCount == false)</li>
* </ol>
* </pre>
*
* @param pattern the given pattern
* @param patternStart the start index of the pattern, inclusive
* @param patternEnd the end index of the pattern, exclusive
* @param name the given name
* @param nameStart the start index of the name, inclusive
* @param nameEnd the end index of the name, exclusive
* @param samePartCount flag telling whether the pattern and the name should have the same count
* of parts or not.<br>
* For example:
* <ul>
* <li>'HM' type string pattern will match 'HashMap' and 'HtmlMapper' types, but not
* 'HashMapEntry'</li> <li>'HMap' type string pattern will still match previous 'HashMap'
* and 'HtmlMapper' types, but not 'HighMagnitude'</li>
* </ul>
* @return true if a sub-pattern matches the sub-part of the given name, false otherwise
*/
public static final boolean camelCaseMatch(char[] pattern, int patternStart, int patternEnd,
char[] name, int nameStart, int nameEnd, boolean samePartCount) {
// null name cannot match
if (name == null) {
return false;
}
// null pattern is equivalent to '*'
if (pattern == null) {
return true;
}
if (patternEnd < 0) {
patternEnd = pattern.length;
}
if (nameEnd < 0) {
nameEnd = name.length;
}
if (patternEnd <= patternStart) {
return nameEnd <= nameStart;
}
if (nameEnd <= nameStart) {
return false;
}
// check first pattern char
if (name[nameStart] != pattern[patternStart]) {
// first char must strictly match (upper/lower)
return false;
}
char patternChar, nameChar;
int iPattern = patternStart;
int iName = nameStart;
// Main loop is on pattern characters
while (true) {
iPattern++;
iName++;
if (iPattern == patternEnd) { // we have exhausted pattern...
// it's a match if the name can have additional parts (i.e. uppercase
// characters) or is also exhausted
if (!samePartCount || iName == nameEnd) {
return true;
}
// otherwise it's a match only if the name has no more uppercase characters
while (true) {
if (iName == nameEnd) {
// we have exhausted the name, so it's a match
return true;
}
nameChar = name[iName];
// test if the name character is uppercase
if (!Character.isJavaIdentifierPart(nameChar) || Character.isUpperCase(nameChar)) {
return false;
}
iName++;
}
}
if (iName == nameEnd) {
// We have exhausted the name (and not the pattern), so it's not a match
return false;
}
// For as long as we're exactly matching, bring it on (even if it's a lower case character)
if ((patternChar = pattern[iPattern]) == name[iName]) {
continue;
}
// If characters are not equals, then it's not a match if patternChar is lowercase
if (Character.isJavaIdentifierPart(patternChar) && !Character.isUpperCase(patternChar)
&& !Character.isDigit(patternChar)) {
return false;
}
// patternChar is uppercase, so let's find the next uppercase in name
while (true) {
if (iName == nameEnd) {
// We have exhausted name (and not pattern), so it's not a match
return false;
}
nameChar = name[iName];
if (Character.isJavaIdentifierPart(nameChar) && !Character.isUpperCase(nameChar)
&& !Character.isDigit(nameChar)) {
iName++;
} else if (Character.isDigit(nameChar)) {
if (patternChar == nameChar) {
break;
}
iName++;
} else if (patternChar != nameChar) {
return false;
} else {
break;
}
}
// At this point, either name has been exhausted, or it is at an uppercase
// letter.
// Since pattern is also at an uppercase letter
}
}
/**
* Return true if the pattern matches the given name, false otherwise. This char[] pattern
* matching accepts wild-cards '*' and '?'. When not case sensitive, the pattern is assumed to
* already be lowercased, the name will be lowercased character per character as comparing. If
* name is null, the answer is false. If pattern is null, the answer is true if name is not null. <br>
* <br>
* For example:
* <ol>
* <li>
*
* <pre>
* pattern = { '?', 'b', '*' }
* name = { 'a', 'b', 'c' , 'd' }
* isCaseSensitive = true
* result => true
* </pre>
* </li>
* <li>
*
* <pre>
* pattern = { '?', 'b', '?' }
* name = { 'a', 'b', 'c' , 'd' }
* isCaseSensitive = true
* result => false
* </pre>
* </li>
* <li>
*
* <pre>
* pattern = { 'b', '*' }
* name = { 'a', 'b', 'c' , 'd' }
* isCaseSensitive = true
* result => false
* </pre>
* </li>
* </ol>
*
* @param pattern the given pattern
* @param name the given name
* @param isCaseSensitive flag to know whether or not the matching should be case sensitive
* @return true if the pattern matches the given name, false otherwise
*/
public static final boolean match(char[] pattern, char[] name, boolean isCaseSensitive) {
if (name == null) {
return false; // null name cannot match
}
if (pattern == null) {
return true; // null pattern is equivalent to '*'
}
return match(pattern, 0, pattern.length, name, 0, name.length, isCaseSensitive);
}
/**
* Return true if a sub-pattern matches the subpart of the given name, false otherwise. char[]
* pattern matching, accepting wild-cards '*' and '?'. Can match only subset of name/pattern. end
* positions are non-inclusive. The subpattern is defined by the patternStart and pattternEnd
* positions. When not case sensitive, the pattern is assumed to already be lowercased, the name
* will be lowercased character per character as comparing. <br>
* <br>
* For example:
* <ol>
* <li>
*
* <pre>
* pattern = { '?', 'b', '*' }
* patternStart = 1
* patternEnd = 3
* name = { 'a', 'b', 'c' , 'd' }
* nameStart = 1
* nameEnd = 4
* isCaseSensitive = true
* result => true
* </pre>
* </li>
* <li>
*
* <pre>
* pattern = { '?', 'b', '*' }
* patternStart = 1
* patternEnd = 2
* name = { 'a', 'b', 'c' , 'd' }
* nameStart = 1
* nameEnd = 2
* isCaseSensitive = true
* result => false
* </pre>
* </li>
* </ol>
*
* @param pattern the given pattern
* @param patternStart the given pattern start
* @param patternEnd the given pattern end
* @param name the given name
* @param nameStart the given name start
* @param nameEnd the given name end
* @param isCaseSensitive flag to know if the matching should be case sensitive
* @return true if a sub-pattern matches the subpart of the given name, false otherwise
*/
public static final boolean match(char[] pattern, int patternStart, int patternEnd, char[] name,
int nameStart, int nameEnd, boolean isCaseSensitive) {
if (name == null) {
return false; // null name cannot match
}
if (pattern == null) {
return true; // null pattern is equivalent to '*'
}
int iPattern = patternStart;
int iName = nameStart;
if (patternEnd < 0) {
patternEnd = pattern.length;
}
if (nameEnd < 0) {
nameEnd = name.length;
}
/* check first segment */
char patternChar = 0;
while ((iPattern < patternEnd) && (patternChar = pattern[iPattern]) != '*') {
if (iName == nameEnd) {
return false;
}
if (patternChar != (isCaseSensitive ? name[iName] : toLowerCase(name[iName]))
&& patternChar != '?') {
return false;
}
iName++;
iPattern++;
}
/* check sequence of star+segment */
int segmentStart;
if (patternChar == '*') {
segmentStart = ++iPattern; // skip star
} else {
segmentStart = 0; // force iName check
}
int prefixStart = iName;
checkSegment : while (iName < nameEnd) {
if (iPattern == patternEnd) {
iPattern = segmentStart; // mismatch - restart current segment
iName = ++prefixStart;
continue checkSegment;
}
/* segment is ending */
if ((patternChar = pattern[iPattern]) == '*') {
segmentStart = ++iPattern; // skip start
if (segmentStart == patternEnd) {
return true;
}
prefixStart = iName;
continue checkSegment;
}
/* check current name character */
if ((isCaseSensitive ? name[iName] : toLowerCase(name[iName])) != patternChar
&& patternChar != '?') {
iPattern = segmentStart; // mismatch - restart current segment
iName = ++prefixStart;
continue checkSegment;
}
iName++;
iPattern++;
}
return (segmentStart == patternEnd) || (iName == nameEnd && iPattern == patternEnd)
|| (iPattern == patternEnd - 1 && pattern[iPattern] == '*');
}
private static char toLowerCase(char c) {
return Character.toLowerCase(c);
}
}