/* * Copyright (c) 2013, the Dart project authors. * * Licensed under the Eclipse Public License v1.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.eclipse.org/legal/epl-v10.html * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package com.google.dart.tools.core.utilities.general; /** * @coverage dart.engine.utilities */ public class CharOperation { /** * Return true if the pattern matches the given name using CamelCase rules, or false otherwise. * char[] CamelCase matching does NOT accept explicit wild-cards '*' and '?' and is inherently * case sensitive. * <p> * CamelCase denotes the convention of writing compound names without spaces, and capitalizing * every term. This function recognizes both upper and lower CamelCase, depending whether the * leading character is capitalized or not. The leading part of an upper CamelCase pattern is * assumed to contain a sequence of capitals which are appearing in the matching name; e.g. 'NPE' * will match 'NullPointerException', but not 'NewPerfData'. A lower CamelCase pattern uses a * lowercase first character. In Java, type names follow the upper CamelCase convention, whereas * method or field names follow the lower CamelCase convention. * <p> * The pattern may contain lowercase characters, which will be matched in a case sensitive way. * These characters must appear in sequence in the name. For instance, 'NPExcep' will match * 'NullPointerException', but not 'NullPointerExCEPTION' or 'NuPoEx' will match * 'NullPointerException', but not 'NoPointerException'. * <p> * Digit characters are treated in a special way. They can be used in the pattern but are not * always considered as leading character. For instance, both 'UTF16DSS' and 'UTFDSS' patterns * will match 'UTF16DocumentScannerSupport'. * <p> * Using this method allows matching names to have more parts than the specified pattern (see * {@link #camelCaseMatch(char[], char[], boolean)}).<br> * For instance, 'HM' , 'HaMa' and 'HMap' patterns will match 'HashMap', 'HatMapper' <b>and * also</b> 'HashMapEntry'. * <p> * * <pre> * Examples:<ol> * <li> pattern = "NPE".toCharArray() * name = "NullPointerException".toCharArray() * result => true</li> * <li> pattern = "NPE".toCharArray() * name = "NoPermissionException".toCharArray() * result => true</li> * <li> pattern = "NuPoEx".toCharArray() * name = "NullPointerException".toCharArray() * result => true</li> * <li> pattern = "NuPoEx".toCharArray() * name = "NoPermissionException".toCharArray() * result => false</li> * <li> pattern = "npe".toCharArray() * name = "NullPointerException".toCharArray() * result => false</li> * <li> pattern = "IPL3".toCharArray() * name = "IPerspectiveListener3".toCharArray() * result => true</li> * <li> pattern = "HM".toCharArray() * name = "HashMapEntry".toCharArray() * result => true</li> * </ol></pre> * * @param pattern the given pattern * @param name the given name * @return true if the pattern matches the given name, false otherwise */ public static final boolean camelCaseMatch(char[] pattern, char[] name) { // null pattern is equivalent to '*' if (pattern == null) { return true; } // null name cannot match if (name == null) { return false; } return camelCaseMatch(pattern, 0, pattern.length, name, 0, name.length, false); } /** * Return true if the pattern matches the given name using CamelCase rules, or false otherwise. * char[] CamelCase matching does NOT accept explicit wild-cards '*' and '?' and is inherently * case sensitive. * <p> * CamelCase denotes the convention of writing compound names without spaces, and capitalizing * every term. This function recognizes both upper and lower CamelCase, depending whether the * leading character is capitalized or not. The leading part of an upper CamelCase pattern is * assumed to contain a sequence of capitals which are appearing in the matching name; e.g. 'NPE' * will match 'NullPointerException', but not 'NewPerfData'. A lower CamelCase pattern uses a * lowercase first character. In Java, type names follow the upper CamelCase convention, whereas * method or field names follow the lower CamelCase convention. * <p> * The pattern may contain lowercase characters, which will be matched in a case sensitive way. * These characters must appear in sequence in the name. For instance, 'NPExcep' will match * 'NullPointerException', but not 'NullPointerExCEPTION' or 'NuPoEx' will match * 'NullPointerException', but not 'NoPointerException'. * <p> * Digit characters are treated in a special way. They can be used in the pattern but are not * always considered as leading character. For instance, both 'UTF16DSS' and 'UTFDSS' patterns * will match 'UTF16DocumentScannerSupport'. * <p> * CamelCase can be restricted to match only the same count of parts. When this restriction is * specified the given pattern and the given name must have <b>exactly</b> the same number of * parts (i.e. the same number of uppercase characters).<br> * For instance, 'HM' , 'HaMa' and 'HMap' patterns will match 'HashMap' and 'HatMapper' <b>but * not</b> 'HashMapEntry'. * <p> * * <pre> * Examples:<ol> * <li> pattern = "NPE".toCharArray() * name = "NullPointerException".toCharArray() * result => true</li> * <li> pattern = "NPE".toCharArray() * name = "NoPermissionException".toCharArray() * result => true</li> * <li> pattern = "NuPoEx".toCharArray() * name = "NullPointerException".toCharArray() * result => true</li> * <li> pattern = "NuPoEx".toCharArray() * name = "NoPermissionException".toCharArray() * result => false</li> * <li> pattern = "npe".toCharArray() * name = "NullPointerException".toCharArray() * result => false</li> * <li> pattern = "IPL3".toCharArray() * name = "IPerspectiveListener3".toCharArray() * result => true</li> * <li> pattern = "HM".toCharArray() * name = "HashMapEntry".toCharArray() * result => (samePartCount == false)</li> * </ol></pre> * * @param pattern the given pattern * @param name the given name * @param samePartCount flag telling whether the pattern and the name should have the same count * of parts or not.<br> *   For example: * <ul> * <li>'HM' type string pattern will match 'HashMap' and 'HtmlMapper' types, but not * 'HashMapEntry'</li> <li>'HMap' type string pattern will still match previous 'HashMap' * and 'HtmlMapper' types, but not 'HighMagnitude'</li> * </ul> * @return true if the pattern matches the given name, false otherwise */ public static final boolean camelCaseMatch(char[] pattern, char[] name, boolean samePartCount) { // null pattern is equivalent to '*' if (pattern == null) { return true; } // null name cannot match if (name == null) { return false; } return camelCaseMatch(pattern, 0, pattern.length, name, 0, name.length, samePartCount); } /** * Return true if a sub-pattern matches the sub-part of the given name using CamelCase rules, or * false otherwise. char[] CamelCase matching does NOT accept explicit wild-cards '*' and '?' and * is inherently case sensitive. Can match only subset of name/pattern, considering end positions * as non-inclusive. The sub-pattern is defined by the patternStart and patternEnd positions. * <p> * CamelCase denotes the convention of writing compound names without spaces, and capitalizing * every term. This function recognizes both upper and lower CamelCase, depending whether the * leading character is capitalized or not. The leading part of an upper CamelCase pattern is * assumed to contain a sequence of capitals which are appearing in the matching name; e.g. 'NPE' * will match 'NullPointerException', but not 'NewPerfData'. A lower CamelCase pattern uses a * lowercase first character. In Java, type names follow the upper CamelCase convention, whereas * method or field names follow the lower CamelCase convention. * <p> * The pattern may contain lowercase characters, which will be matched in a case sensitive way. * These characters must appear in sequence in the name. For instance, 'NPExcep' will match * 'NullPointerException', but not 'NullPointerExCEPTION' or 'NuPoEx' will match * 'NullPointerException', but not 'NoPointerException'. * <p> * Digit characters are treated in a special way. They can be used in the pattern but are not * always considered as leading character. For instance, both 'UTF16DSS' and 'UTFDSS' patterns * will match 'UTF16DocumentScannerSupport'. * <p> * Digit characters are treated in a special way. They can be used in the pattern but are not * always considered as leading character. For instance, both 'UTF16DSS' and 'UTFDSS' patterns * will match 'UTF16DocumentScannerSupport'. * <p> * Using this method allows matching names to have more parts than the specified pattern (see * {@link #camelCaseMatch(char[], int, int, char[], int, int, boolean)}).<br> * For instance, 'HM' , 'HaMa' and 'HMap' patterns will match 'HashMap', 'HatMapper' <b>and * also</b> 'HashMapEntry'. * <p> * Examples: * <ol> * <li>pattern = "NPE".toCharArray() patternStart = 0 patternEnd = 3 name = * "NullPointerException".toCharArray() nameStart = 0 nameEnd = 20 result => true</li> * <li>pattern = "NPE".toCharArray() patternStart = 0 patternEnd = 3 name = * "NoPermissionException".toCharArray() nameStart = 0 nameEnd = 21 result => true</li> * <li>pattern = "NuPoEx".toCharArray() patternStart = 0 patternEnd = 6 name = * "NullPointerException".toCharArray() nameStart = 0 nameEnd = 20 result => true</li> * <li>pattern = "NuPoEx".toCharArray() patternStart = 0 patternEnd = 6 name = * "NoPermissionException".toCharArray() nameStart = 0 nameEnd = 21 result => false</li> * <li>pattern = "npe".toCharArray() patternStart = 0 patternEnd = 3 name = * "NullPointerException".toCharArray() nameStart = 0 nameEnd = 20 result => false</li> * <li>pattern = "IPL3".toCharArray() patternStart = 0 patternEnd = 4 name = * "IPerspectiveListener3".toCharArray() nameStart = 0 nameEnd = 21 result => true</li> * <li>pattern = "HM".toCharArray() patternStart = 0 patternEnd = 2 name = * "HashMapEntry".toCharArray() nameStart = 0 nameEnd = 12 result => true</li> * </ol> * * @param pattern the given pattern * @param patternStart the start index of the pattern, inclusive * @param patternEnd the end index of the pattern, exclusive * @param name the given name * @param nameStart the start index of the name, inclusive * @param nameEnd the end index of the name, exclusive * @return true if a sub-pattern matches the sub-part of the given name, false otherwise */ public static final boolean camelCaseMatch(char[] pattern, int patternStart, int patternEnd, char[] name, int nameStart, int nameEnd) { return camelCaseMatch(pattern, patternStart, patternEnd, name, nameStart, nameEnd, false); } /** * Return true if a sub-pattern matches the sub-part of the given name using CamelCase rules, or * false otherwise. char[] CamelCase matching does NOT accept explicit wild-cards '*' and '?' and * is inherently case sensitive. Can match only subset of name/pattern, considering end positions * as non-inclusive. The sub-pattern is defined by the patternStart and patternEnd positions. * <p> * CamelCase denotes the convention of writing compound names without spaces, and capitalizing * every term. This function recognizes both upper and lower CamelCase, depending whether the * leading character is capitalized or not. The leading part of an upper CamelCase pattern is * assumed to contain a sequence of capitals which are appearing in the matching name; e.g. 'NPE' * will match 'NullPointerException', but not 'NewPerfData'. A lower CamelCase pattern uses a * lowercase first character. In Java, type names follow the upper CamelCase convention, whereas * method or field names follow the lower CamelCase convention. * <p> * The pattern may contain lowercase characters, which will be matched in a case sensitive way. * These characters must appear in sequence in the name. For instance, 'NPExcep' will match * 'NullPointerException', but not 'NullPointerExCEPTION' or 'NuPoEx' will match * 'NullPointerException', but not 'NoPointerException'. * <p> * Digit characters are treated in a special way. They can be used in the pattern but are not * always considered as leading character. For instance, both 'UTF16DSS' and 'UTFDSS' patterns * will match 'UTF16DocumentScannerSupport'. * <p> * CamelCase can be restricted to match only the same count of parts. When this restriction is * specified the given pattern and the given name must have <b>exactly</b> the same number of * parts (i.e. the same number of uppercase characters).<br> * For instance, 'HM' , 'HaMa' and 'HMap' patterns will match 'HashMap' and 'HatMapper' <b>but * not</b> 'HashMapEntry'. * <p> * * <pre> * Examples: * <ol> * <li> pattern = "NPE".toCharArray() * patternStart = 0 * patternEnd = 3 * name = "NullPointerException".toCharArray() * nameStart = 0 * nameEnd = 20 * result => true</li> * <li> pattern = "NPE".toCharArray() * patternStart = 0 * patternEnd = 3 * name = "NoPermissionException".toCharArray() * nameStart = 0 * nameEnd = 21 * result => true</li> * <li> pattern = "NuPoEx".toCharArray() * patternStart = 0 * patternEnd = 6 * name = "NullPointerException".toCharArray() * nameStart = 0 * nameEnd = 20 * result => true</li> * <li> pattern = "NuPoEx".toCharArray() * patternStart = 0 * patternEnd = 6 * name = "NoPermissionException".toCharArray() * nameStart = 0 * nameEnd = 21 * result => false</li> * <li> pattern = "npe".toCharArray() * patternStart = 0 * patternEnd = 3 * name = "NullPointerException".toCharArray() * nameStart = 0 * nameEnd = 20 * result => false</li> * <li> pattern = "IPL3".toCharArray() * patternStart = 0 * patternEnd = 4 * name = "IPerspectiveListener3".toCharArray() * nameStart = 0 * nameEnd = 21 * result => true</li> * <li> pattern = "HM".toCharArray() * patternStart = 0 * patternEnd = 2 * name = "HashMapEntry".toCharArray() * nameStart = 0 * nameEnd = 12 * result => (samePartCount == false)</li> * </ol> * </pre> * * @param pattern the given pattern * @param patternStart the start index of the pattern, inclusive * @param patternEnd the end index of the pattern, exclusive * @param name the given name * @param nameStart the start index of the name, inclusive * @param nameEnd the end index of the name, exclusive * @param samePartCount flag telling whether the pattern and the name should have the same count * of parts or not.<br> *   For example: * <ul> * <li>'HM' type string pattern will match 'HashMap' and 'HtmlMapper' types, but not * 'HashMapEntry'</li> <li>'HMap' type string pattern will still match previous 'HashMap' * and 'HtmlMapper' types, but not 'HighMagnitude'</li> * </ul> * @return true if a sub-pattern matches the sub-part of the given name, false otherwise */ public static final boolean camelCaseMatch(char[] pattern, int patternStart, int patternEnd, char[] name, int nameStart, int nameEnd, boolean samePartCount) { // null name cannot match if (name == null) { return false; } // null pattern is equivalent to '*' if (pattern == null) { return true; } if (patternEnd < 0) { patternEnd = pattern.length; } if (nameEnd < 0) { nameEnd = name.length; } if (patternEnd <= patternStart) { return nameEnd <= nameStart; } if (nameEnd <= nameStart) { return false; } // check first pattern char if (name[nameStart] != pattern[patternStart]) { // first char must strictly match (upper/lower) return false; } char patternChar, nameChar; int iPattern = patternStart; int iName = nameStart; // Main loop is on pattern characters while (true) { iPattern++; iName++; if (iPattern == patternEnd) { // we have exhausted pattern... // it's a match if the name can have additional parts (i.e. uppercase // characters) or is also exhausted if (!samePartCount || iName == nameEnd) { return true; } // otherwise it's a match only if the name has no more uppercase characters while (true) { if (iName == nameEnd) { // we have exhausted the name, so it's a match return true; } nameChar = name[iName]; // test if the name character is uppercase if (!Character.isJavaIdentifierPart(nameChar) || Character.isUpperCase(nameChar)) { return false; } iName++; } } if (iName == nameEnd) { // We have exhausted the name (and not the pattern), so it's not a match return false; } // For as long as we're exactly matching, bring it on (even if it's a lower case character) if ((patternChar = pattern[iPattern]) == name[iName]) { continue; } // If characters are not equals, then it's not a match if patternChar is lowercase if (Character.isJavaIdentifierPart(patternChar) && !Character.isUpperCase(patternChar) && !Character.isDigit(patternChar)) { return false; } // patternChar is uppercase, so let's find the next uppercase in name while (true) { if (iName == nameEnd) { // We have exhausted name (and not pattern), so it's not a match return false; } nameChar = name[iName]; if (Character.isJavaIdentifierPart(nameChar) && !Character.isUpperCase(nameChar) && !Character.isDigit(nameChar)) { iName++; } else if (Character.isDigit(nameChar)) { if (patternChar == nameChar) { break; } iName++; } else if (patternChar != nameChar) { return false; } else { break; } } // At this point, either name has been exhausted, or it is at an uppercase // letter. // Since pattern is also at an uppercase letter } } /** * Return true if the pattern matches the given name, false otherwise. This char[] pattern * matching accepts wild-cards '*' and '?'. When not case sensitive, the pattern is assumed to * already be lowercased, the name will be lowercased character per character as comparing. If * name is null, the answer is false. If pattern is null, the answer is true if name is not null. <br> * <br> * For example: * <ol> * <li> * * <pre> * pattern = { '?', 'b', '*' } * name = { 'a', 'b', 'c' , 'd' } * isCaseSensitive = true * result => true * </pre> * </li> * <li> * * <pre> * pattern = { '?', 'b', '?' } * name = { 'a', 'b', 'c' , 'd' } * isCaseSensitive = true * result => false * </pre> * </li> * <li> * * <pre> * pattern = { 'b', '*' } * name = { 'a', 'b', 'c' , 'd' } * isCaseSensitive = true * result => false * </pre> * </li> * </ol> * * @param pattern the given pattern * @param name the given name * @param isCaseSensitive flag to know whether or not the matching should be case sensitive * @return true if the pattern matches the given name, false otherwise */ public static final boolean match(char[] pattern, char[] name, boolean isCaseSensitive) { if (name == null) { return false; // null name cannot match } if (pattern == null) { return true; // null pattern is equivalent to '*' } return match(pattern, 0, pattern.length, name, 0, name.length, isCaseSensitive); } /** * Return true if a sub-pattern matches the subpart of the given name, false otherwise. char[] * pattern matching, accepting wild-cards '*' and '?'. Can match only subset of name/pattern. end * positions are non-inclusive. The subpattern is defined by the patternStart and pattternEnd * positions. When not case sensitive, the pattern is assumed to already be lowercased, the name * will be lowercased character per character as comparing. <br> * <br> * For example: * <ol> * <li> * * <pre> * pattern = { '?', 'b', '*' } * patternStart = 1 * patternEnd = 3 * name = { 'a', 'b', 'c' , 'd' } * nameStart = 1 * nameEnd = 4 * isCaseSensitive = true * result => true * </pre> * </li> * <li> * * <pre> * pattern = { '?', 'b', '*' } * patternStart = 1 * patternEnd = 2 * name = { 'a', 'b', 'c' , 'd' } * nameStart = 1 * nameEnd = 2 * isCaseSensitive = true * result => false * </pre> * </li> * </ol> * * @param pattern the given pattern * @param patternStart the given pattern start * @param patternEnd the given pattern end * @param name the given name * @param nameStart the given name start * @param nameEnd the given name end * @param isCaseSensitive flag to know if the matching should be case sensitive * @return true if a sub-pattern matches the subpart of the given name, false otherwise */ public static final boolean match(char[] pattern, int patternStart, int patternEnd, char[] name, int nameStart, int nameEnd, boolean isCaseSensitive) { if (name == null) { return false; // null name cannot match } if (pattern == null) { return true; // null pattern is equivalent to '*' } int iPattern = patternStart; int iName = nameStart; if (patternEnd < 0) { patternEnd = pattern.length; } if (nameEnd < 0) { nameEnd = name.length; } /* check first segment */ char patternChar = 0; while ((iPattern < patternEnd) && (patternChar = pattern[iPattern]) != '*') { if (iName == nameEnd) { return false; } if (patternChar != (isCaseSensitive ? name[iName] : toLowerCase(name[iName])) && patternChar != '?') { return false; } iName++; iPattern++; } /* check sequence of star+segment */ int segmentStart; if (patternChar == '*') { segmentStart = ++iPattern; // skip star } else { segmentStart = 0; // force iName check } int prefixStart = iName; checkSegment : while (iName < nameEnd) { if (iPattern == patternEnd) { iPattern = segmentStart; // mismatch - restart current segment iName = ++prefixStart; continue checkSegment; } /* segment is ending */ if ((patternChar = pattern[iPattern]) == '*') { segmentStart = ++iPattern; // skip start if (segmentStart == patternEnd) { return true; } prefixStart = iName; continue checkSegment; } /* check current name character */ if ((isCaseSensitive ? name[iName] : toLowerCase(name[iName])) != patternChar && patternChar != '?') { iPattern = segmentStart; // mismatch - restart current segment iName = ++prefixStart; continue checkSegment; } iName++; iPattern++; } return (segmentStart == patternEnd) || (iName == nameEnd && iPattern == patternEnd) || (iPattern == patternEnd - 1 && pattern[iPattern] == '*'); } private static char toLowerCase(char c) { return Character.toLowerCase(c); } }