/* * Copyright (c) 2012, the Dart project authors. * * Licensed under the Eclipse Public License v1.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.eclipse.org/legal/epl-v10.html * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package com.google.dart.tools.search.internal.core.text; import com.google.dart.tools.search.internal.ui.SearchMessages; import org.eclipse.jface.text.FindReplaceDocumentAdapter; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; /** * */ public class PatternConstructor { private PatternConstructor() { // don't instantiate } public static Pattern createPattern(String pattern, boolean isCaseSensitive, boolean isRegex) throws PatternSyntaxException { return createPattern(pattern, isRegex, true, isCaseSensitive, false); } /** * Creates a pattern element from the pattern string which is either a reg-ex expression or in our * old 'StringMatcher' format. * * @param pattern The search pattern * @param isRegex <code>true</code> if the passed string already is a reg-ex pattern * @param isStringMatcher <code>true</code> if the passed string is in the StringMatcher format. * @param isCaseSensitive Set to <code>true</code> to create a case insensitive pattern * @param isWholeWord <code>true</code> to create a pattern that requires a word boundary at the * beginning and the end. * @return The created pattern * @throws PatternSyntaxException if "\R" is at an illegal position */ public static Pattern createPattern(String pattern, boolean isRegex, boolean isStringMatcher, boolean isCaseSensitive, boolean isWholeWord) throws PatternSyntaxException { if (isRegex) { pattern = substituteLinebreak(pattern); if (isWholeWord) { StringBuffer buffer = new StringBuffer(pattern.length() + 10); buffer.append("\\b(?:").append(pattern).append(")\\b"); //$NON-NLS-1$ //$NON-NLS-2$ pattern = buffer.toString(); } } else { int len = pattern.length(); StringBuffer buffer = new StringBuffer(len + 10); // don't add a word boundary if the search text does not start with // a word char. (this works around a user input error). if (isWholeWord && len > 0 && isWordChar(pattern.charAt(0))) { buffer.append("\\b"); //$NON-NLS-1$ } appendAsRegEx(isStringMatcher, pattern, buffer); if (isWholeWord && len > 0 && isWordChar(pattern.charAt(len - 1))) { buffer.append("\\b"); //$NON-NLS-1$ } pattern = buffer.toString(); } int regexOptions = Pattern.MULTILINE; if (!isCaseSensitive) { regexOptions |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; } return Pattern.compile(pattern, regexOptions); } /** * Copied from {@link org.eclipse.jface.text.FindReplaceDocumentAdapter}' to support '\R' * * @param findString the string to substitute * @return the new string * @throws PatternSyntaxException if "\R" is at an illegal position */ private static String substituteLinebreak(String findString) throws PatternSyntaxException { int length = findString.length(); StringBuffer buf = new StringBuffer(length); int inCharGroup = 0; int inBraces = 0; boolean inQuote = false; for (int i = 0; i < length; i++) { char ch = findString.charAt(i); switch (ch) { case '[': buf.append(ch); if (!inQuote) inCharGroup++; break; case ']': buf.append(ch); if (!inQuote) inCharGroup--; break; case '{': buf.append(ch); if (!inQuote && inCharGroup == 0) inBraces++; break; case '}': buf.append(ch); if (!inQuote && inCharGroup == 0) inBraces--; break; case '\\': if (i + 1 < length) { char ch1 = findString.charAt(i + 1); if (inQuote) { if (ch1 == 'E') inQuote = false; buf.append(ch).append(ch1); i++; } else if (ch1 == 'R') { if (inCharGroup > 0 || inBraces > 0) { String msg = SearchMessages.PatternConstructor_error_line_delim_position; throw new PatternSyntaxException(msg, findString, i); } buf.append("(?>\\r\\n?|\\n)"); //$NON-NLS-1$ i++; } else { if (ch1 == 'Q') { inQuote = true; } buf.append(ch).append(ch1); i++; } } else { buf.append(ch); } break; default: buf.append(ch); break; } } return buf.toString(); } private static boolean isWordChar(char c) { return Character.isLetterOrDigit(c); } /** * Creates a pattern element from an array of patterns in the old 'StringMatcher' format. * * @param patterns The search patterns * @param isCaseSensitive Set to <code>true</code> to create a case insensitive pattern * @return The created pattern * @throws PatternSyntaxException if "\R" is at an illegal position */ public static Pattern createPattern(String[] patterns, boolean isCaseSensitive) throws PatternSyntaxException { StringBuffer pattern = new StringBuffer(); for (int i = 0; i < patterns.length; i++) { if (i > 0) { // note that this works only as we know that the operands of the // or expression will be simple and need no brackets. pattern.append('|'); } appendAsRegEx(true, patterns[i], pattern); } return createPattern(pattern.toString(), true, true, isCaseSensitive, false); } public static StringBuffer appendAsRegEx(boolean isStringMatcher, String pattern, StringBuffer buffer) { boolean isEscaped = false; for (int i = 0; i < pattern.length(); i++) { char c = pattern.charAt(i); switch (c) { // the backslash case '\\': // the backslash is escape char in string matcher if (isStringMatcher && !isEscaped) { isEscaped = true; } else { buffer.append("\\\\"); //$NON-NLS-1$ isEscaped = false; } break; // characters that need to be escaped in the regex. case '(': case ')': case '{': case '}': case '.': case '[': case ']': case '$': case '^': case '+': case '|': if (isEscaped) { buffer.append("\\\\"); //$NON-NLS-1$ isEscaped = false; } buffer.append('\\'); buffer.append(c); break; case '?': if (isStringMatcher && !isEscaped) { buffer.append('.'); } else { buffer.append('\\'); buffer.append(c); isEscaped = false; } break; case '*': if (isStringMatcher && !isEscaped) { buffer.append(".*"); //$NON-NLS-1$ } else { buffer.append('\\'); buffer.append(c); isEscaped = false; } break; default: if (isEscaped) { buffer.append("\\\\"); //$NON-NLS-1$ isEscaped = false; } buffer.append(c); break; } } if (isEscaped) { buffer.append("\\\\"); //$NON-NLS-1$ isEscaped = false; } return buffer; } /** * Interprets escaped characters in the given replace pattern. * * @param replaceText the replace pattern * @param foundText the found pattern to be replaced * @param lineDelim the line delimiter to use for \R * @return a replace pattern with escaped characters substituted by the respective characters */ public static String interpretReplaceEscapes(String replaceText, String foundText, String lineDelim) { return new ReplaceStringConstructor(lineDelim).interpretReplaceEscapes(replaceText, foundText); } /** * Copied from {@link FindReplaceDocumentAdapter} FindReplaceDocumentAdapter with contributions * from: Cagatay Calli <ccalli@gmail.com> - [find/replace] retain caps when replacing - * https://bugs.eclipse.org/bugs/show_bug.cgi?id=28949 Cagatay Calli <ccalli@gmail.com> - * [find/replace] define & fix behavior of retain caps with other escapes and text before \C - * https://bugs.eclipse.org/bugs/show_bug.cgi?id=217061 */ private static class ReplaceStringConstructor { private static final int RC_MIXED = 0; private static final int RC_UPPER = 1; private static final int RC_LOWER = 2; private static final int RC_FIRSTUPPER = 3; private int fRetainCaseMode; private final String fLineDelim; public ReplaceStringConstructor(String lineDelim) { fLineDelim = lineDelim; } /** * Interprets escaped characters in the given replace pattern. * * @param replaceText the replace pattern * @param foundText the found pattern to be replaced * @return a replace pattern with escaped characters substituted by the respective characters */ private String interpretReplaceEscapes(String replaceText, String foundText) { int length = replaceText.length(); boolean inEscape = false; StringBuffer buf = new StringBuffer(length); /* * every string we did not check looks mixed at first so initialize retain case mode with * RC_MIXED */ fRetainCaseMode = RC_MIXED; for (int i = 0; i < length; i++) { final char ch = replaceText.charAt(i); if (inEscape) { i = interpretReplaceEscape(ch, i, buf, replaceText, foundText); inEscape = false; } else if (ch == '\\') { inEscape = true; } else if (ch == '$') { buf.append(ch); /* * Feature in java.util.regex.Matcher#replaceFirst(String): $00, $000, etc. are * interpreted as $0 and $01, $001, etc. are interpreted as $1, etc. . If we support \0 as * replacement pattern for capturing group 0, it would not be possible any more to write a * replacement pattern that appends 0 to a capturing group (like $0\0). The fix is to * interpret \00 and $00 as $0\0, and \01 and $01 as $0\1, etc. */ if (i + 2 < length) { char ch1 = replaceText.charAt(i + 1); char ch2 = replaceText.charAt(i + 2); if (ch1 == '0' && '0' <= ch2 && ch2 <= '9') { buf.append("0\\"); //$NON-NLS-1$ i++; // consume the 0 } } } else { interpretRetainCase(buf, ch); } } if (inEscape) { // '\' as last character is invalid, but we still add it to get an error message buf.append('\\'); } return buf.toString(); } /** * Interprets the escaped character <code>ch</code> at offset <code>i</code> of the * <code>replaceText</code> and appends the interpretation to <code>buf</code>. * * @param ch the escaped character * @param i the offset * @param buf the output buffer * @param replaceText the original replace pattern * @param foundText the found pattern to be replaced * @return the new offset */ private int interpretReplaceEscape(final char ch, int i, StringBuffer buf, String replaceText, String foundText) { int length = replaceText.length(); switch (ch) { case 'r': buf.append('\r'); break; case 'n': buf.append('\n'); break; case 't': buf.append('\t'); break; case 'f': buf.append('\f'); break; case 'a': buf.append('\u0007'); break; case 'e': buf.append('\u001B'); break; case 'R': //see http://www.unicode.org/unicode/reports/tr18/#Line_Boundaries buf.append(fLineDelim); break; /* * \0 for octal is not supported in replace string, since it would conflict with capturing * group \0, etc. */ case '0': buf.append('$').append(ch); /* * See explanation in "Feature in java.util.regex.Matcher#replaceFirst(String)" in * interpretReplaceEscape(String) above. */ if (i + 1 < length) { char ch1 = replaceText.charAt(i + 1); if ('0' <= ch1 && ch1 <= '9') { buf.append('\\'); } } break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': buf.append('$').append(ch); break; case 'c': if (i + 1 < length) { char ch1 = replaceText.charAt(i + 1); interpretRetainCase(buf, (char) (ch1 ^ 64)); i++; } else { String msg = SearchMessages.PatternConstructor_error_escape_sequence; throw new PatternSyntaxException(msg, replaceText, i); } break; case 'x': if (i + 2 < length) { int parsedInt; try { parsedInt = Integer.parseInt(replaceText.substring(i + 1, i + 3), 16); if (parsedInt < 0) throw new NumberFormatException(); } catch (NumberFormatException e) { String msg = SearchMessages.PatternConstructor_error_hex_escape_sequence; throw new PatternSyntaxException(msg, replaceText, i); } interpretRetainCase(buf, (char) parsedInt); i += 2; } else { String msg = SearchMessages.PatternConstructor_error_hex_escape_sequence; throw new PatternSyntaxException(msg, replaceText, i); } break; case 'u': if (i + 4 < length) { int parsedInt; try { parsedInt = Integer.parseInt(replaceText.substring(i + 1, i + 5), 16); if (parsedInt < 0) throw new NumberFormatException(); } catch (NumberFormatException e) { String msg = SearchMessages.PatternConstructor_error_unicode_escape_sequence; throw new PatternSyntaxException(msg, replaceText, i); } interpretRetainCase(buf, (char) parsedInt); i += 4; } else { String msg = SearchMessages.PatternConstructor_error_unicode_escape_sequence; throw new PatternSyntaxException(msg, replaceText, i); } break; case 'C': if (foundText.toUpperCase().equals(foundText)) // is whole match upper-case? fRetainCaseMode = RC_UPPER; else if (foundText.toLowerCase().equals(foundText)) // is whole match lower-case? fRetainCaseMode = RC_LOWER; else if (Character.isUpperCase(foundText.charAt(0))) // is first character upper-case? fRetainCaseMode = RC_FIRSTUPPER; else fRetainCaseMode = RC_MIXED; break; default: // unknown escape k: append uninterpreted \k buf.append('\\').append(ch); break; } return i; } /** * Interprets current Retain Case mode (all upper-case,all lower-case,capitalized or mixed) and * appends the character <code>ch</code> to <code>buf</code> after processing. * * @param buf the output buffer * @param ch the character to process */ private void interpretRetainCase(StringBuffer buf, char ch) { if (fRetainCaseMode == RC_UPPER) buf.append(Character.toUpperCase(ch)); else if (fRetainCaseMode == RC_LOWER) buf.append(Character.toLowerCase(ch)); else if (fRetainCaseMode == RC_FIRSTUPPER) { buf.append(Character.toUpperCase(ch)); fRetainCaseMode = RC_MIXED; } else buf.append(ch); } } }