/*
* Copyright (c) 2012, the Dart project authors.
*
* Licensed under the Eclipse Public License v1.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.eclipse.org/legal/epl-v10.html
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.dart.tools.search.internal.core.text;
import com.google.dart.tools.search.internal.ui.SearchMessages;
import org.eclipse.jface.text.FindReplaceDocumentAdapter;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
/**
*
*/
public class PatternConstructor {
private PatternConstructor() {
// don't instantiate
}
public static Pattern createPattern(String pattern, boolean isCaseSensitive, boolean isRegex)
throws PatternSyntaxException {
return createPattern(pattern, isRegex, true, isCaseSensitive, false);
}
/**
* Creates a pattern element from the pattern string which is either a reg-ex expression or in our
* old 'StringMatcher' format.
*
* @param pattern The search pattern
* @param isRegex <code>true</code> if the passed string already is a reg-ex pattern
* @param isStringMatcher <code>true</code> if the passed string is in the StringMatcher format.
* @param isCaseSensitive Set to <code>true</code> to create a case insensitive pattern
* @param isWholeWord <code>true</code> to create a pattern that requires a word boundary at the
* beginning and the end.
* @return The created pattern
* @throws PatternSyntaxException if "\R" is at an illegal position
*/
public static Pattern createPattern(String pattern, boolean isRegex, boolean isStringMatcher,
boolean isCaseSensitive, boolean isWholeWord) throws PatternSyntaxException {
if (isRegex) {
pattern = substituteLinebreak(pattern);
if (isWholeWord) {
StringBuffer buffer = new StringBuffer(pattern.length() + 10);
buffer.append("\\b(?:").append(pattern).append(")\\b"); //$NON-NLS-1$ //$NON-NLS-2$
pattern = buffer.toString();
}
} else {
int len = pattern.length();
StringBuffer buffer = new StringBuffer(len + 10);
// don't add a word boundary if the search text does not start with
// a word char. (this works around a user input error).
if (isWholeWord && len > 0 && isWordChar(pattern.charAt(0))) {
buffer.append("\\b"); //$NON-NLS-1$
}
appendAsRegEx(isStringMatcher, pattern, buffer);
if (isWholeWord && len > 0 && isWordChar(pattern.charAt(len - 1))) {
buffer.append("\\b"); //$NON-NLS-1$
}
pattern = buffer.toString();
}
int regexOptions = Pattern.MULTILINE;
if (!isCaseSensitive) {
regexOptions |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
}
return Pattern.compile(pattern, regexOptions);
}
/**
* Copied from {@link org.eclipse.jface.text.FindReplaceDocumentAdapter}' to support '\R'
*
* @param findString the string to substitute
* @return the new string
* @throws PatternSyntaxException if "\R" is at an illegal position
*/
private static String substituteLinebreak(String findString) throws PatternSyntaxException {
int length = findString.length();
StringBuffer buf = new StringBuffer(length);
int inCharGroup = 0;
int inBraces = 0;
boolean inQuote = false;
for (int i = 0; i < length; i++) {
char ch = findString.charAt(i);
switch (ch) {
case '[':
buf.append(ch);
if (!inQuote)
inCharGroup++;
break;
case ']':
buf.append(ch);
if (!inQuote)
inCharGroup--;
break;
case '{':
buf.append(ch);
if (!inQuote && inCharGroup == 0)
inBraces++;
break;
case '}':
buf.append(ch);
if (!inQuote && inCharGroup == 0)
inBraces--;
break;
case '\\':
if (i + 1 < length) {
char ch1 = findString.charAt(i + 1);
if (inQuote) {
if (ch1 == 'E')
inQuote = false;
buf.append(ch).append(ch1);
i++;
} else if (ch1 == 'R') {
if (inCharGroup > 0 || inBraces > 0) {
String msg = SearchMessages.PatternConstructor_error_line_delim_position;
throw new PatternSyntaxException(msg, findString, i);
}
buf.append("(?>\\r\\n?|\\n)"); //$NON-NLS-1$
i++;
} else {
if (ch1 == 'Q') {
inQuote = true;
}
buf.append(ch).append(ch1);
i++;
}
} else {
buf.append(ch);
}
break;
default:
buf.append(ch);
break;
}
}
return buf.toString();
}
private static boolean isWordChar(char c) {
return Character.isLetterOrDigit(c);
}
/**
* Creates a pattern element from an array of patterns in the old 'StringMatcher' format.
*
* @param patterns The search patterns
* @param isCaseSensitive Set to <code>true</code> to create a case insensitive pattern
* @return The created pattern
* @throws PatternSyntaxException if "\R" is at an illegal position
*/
public static Pattern createPattern(String[] patterns, boolean isCaseSensitive)
throws PatternSyntaxException {
StringBuffer pattern = new StringBuffer();
for (int i = 0; i < patterns.length; i++) {
if (i > 0) {
// note that this works only as we know that the operands of the
// or expression will be simple and need no brackets.
pattern.append('|');
}
appendAsRegEx(true, patterns[i], pattern);
}
return createPattern(pattern.toString(), true, true, isCaseSensitive, false);
}
public static StringBuffer appendAsRegEx(boolean isStringMatcher, String pattern,
StringBuffer buffer) {
boolean isEscaped = false;
for (int i = 0; i < pattern.length(); i++) {
char c = pattern.charAt(i);
switch (c) {
// the backslash
case '\\':
// the backslash is escape char in string matcher
if (isStringMatcher && !isEscaped) {
isEscaped = true;
} else {
buffer.append("\\\\"); //$NON-NLS-1$
isEscaped = false;
}
break;
// characters that need to be escaped in the regex.
case '(':
case ')':
case '{':
case '}':
case '.':
case '[':
case ']':
case '$':
case '^':
case '+':
case '|':
if (isEscaped) {
buffer.append("\\\\"); //$NON-NLS-1$
isEscaped = false;
}
buffer.append('\\');
buffer.append(c);
break;
case '?':
if (isStringMatcher && !isEscaped) {
buffer.append('.');
} else {
buffer.append('\\');
buffer.append(c);
isEscaped = false;
}
break;
case '*':
if (isStringMatcher && !isEscaped) {
buffer.append(".*"); //$NON-NLS-1$
} else {
buffer.append('\\');
buffer.append(c);
isEscaped = false;
}
break;
default:
if (isEscaped) {
buffer.append("\\\\"); //$NON-NLS-1$
isEscaped = false;
}
buffer.append(c);
break;
}
}
if (isEscaped) {
buffer.append("\\\\"); //$NON-NLS-1$
isEscaped = false;
}
return buffer;
}
/**
* Interprets escaped characters in the given replace pattern.
*
* @param replaceText the replace pattern
* @param foundText the found pattern to be replaced
* @param lineDelim the line delimiter to use for \R
* @return a replace pattern with escaped characters substituted by the respective characters
*/
public static String interpretReplaceEscapes(String replaceText, String foundText,
String lineDelim) {
return new ReplaceStringConstructor(lineDelim).interpretReplaceEscapes(replaceText, foundText);
}
/**
* Copied from {@link FindReplaceDocumentAdapter} FindReplaceDocumentAdapter with contributions
* from: Cagatay Calli <ccalli@gmail.com> - [find/replace] retain caps when replacing -
* https://bugs.eclipse.org/bugs/show_bug.cgi?id=28949 Cagatay Calli <ccalli@gmail.com> -
* [find/replace] define & fix behavior of retain caps with other escapes and text before \C -
* https://bugs.eclipse.org/bugs/show_bug.cgi?id=217061
*/
private static class ReplaceStringConstructor {
private static final int RC_MIXED = 0;
private static final int RC_UPPER = 1;
private static final int RC_LOWER = 2;
private static final int RC_FIRSTUPPER = 3;
private int fRetainCaseMode;
private final String fLineDelim;
public ReplaceStringConstructor(String lineDelim) {
fLineDelim = lineDelim;
}
/**
* Interprets escaped characters in the given replace pattern.
*
* @param replaceText the replace pattern
* @param foundText the found pattern to be replaced
* @return a replace pattern with escaped characters substituted by the respective characters
*/
private String interpretReplaceEscapes(String replaceText, String foundText) {
int length = replaceText.length();
boolean inEscape = false;
StringBuffer buf = new StringBuffer(length);
/*
* every string we did not check looks mixed at first so initialize retain case mode with
* RC_MIXED
*/
fRetainCaseMode = RC_MIXED;
for (int i = 0; i < length; i++) {
final char ch = replaceText.charAt(i);
if (inEscape) {
i = interpretReplaceEscape(ch, i, buf, replaceText, foundText);
inEscape = false;
} else if (ch == '\\') {
inEscape = true;
} else if (ch == '$') {
buf.append(ch);
/*
* Feature in java.util.regex.Matcher#replaceFirst(String): $00, $000, etc. are
* interpreted as $0 and $01, $001, etc. are interpreted as $1, etc. . If we support \0 as
* replacement pattern for capturing group 0, it would not be possible any more to write a
* replacement pattern that appends 0 to a capturing group (like $0\0). The fix is to
* interpret \00 and $00 as $0\0, and \01 and $01 as $0\1, etc.
*/
if (i + 2 < length) {
char ch1 = replaceText.charAt(i + 1);
char ch2 = replaceText.charAt(i + 2);
if (ch1 == '0' && '0' <= ch2 && ch2 <= '9') {
buf.append("0\\"); //$NON-NLS-1$
i++; // consume the 0
}
}
} else {
interpretRetainCase(buf, ch);
}
}
if (inEscape) {
// '\' as last character is invalid, but we still add it to get an error message
buf.append('\\');
}
return buf.toString();
}
/**
* Interprets the escaped character <code>ch</code> at offset <code>i</code> of the
* <code>replaceText</code> and appends the interpretation to <code>buf</code>.
*
* @param ch the escaped character
* @param i the offset
* @param buf the output buffer
* @param replaceText the original replace pattern
* @param foundText the found pattern to be replaced
* @return the new offset
*/
private int interpretReplaceEscape(final char ch, int i, StringBuffer buf, String replaceText,
String foundText) {
int length = replaceText.length();
switch (ch) {
case 'r':
buf.append('\r');
break;
case 'n':
buf.append('\n');
break;
case 't':
buf.append('\t');
break;
case 'f':
buf.append('\f');
break;
case 'a':
buf.append('\u0007');
break;
case 'e':
buf.append('\u001B');
break;
case 'R': //see http://www.unicode.org/unicode/reports/tr18/#Line_Boundaries
buf.append(fLineDelim);
break;
/*
* \0 for octal is not supported in replace string, since it would conflict with capturing
* group \0, etc.
*/
case '0':
buf.append('$').append(ch);
/*
* See explanation in "Feature in java.util.regex.Matcher#replaceFirst(String)" in
* interpretReplaceEscape(String) above.
*/
if (i + 1 < length) {
char ch1 = replaceText.charAt(i + 1);
if ('0' <= ch1 && ch1 <= '9') {
buf.append('\\');
}
}
break;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
buf.append('$').append(ch);
break;
case 'c':
if (i + 1 < length) {
char ch1 = replaceText.charAt(i + 1);
interpretRetainCase(buf, (char) (ch1 ^ 64));
i++;
} else {
String msg = SearchMessages.PatternConstructor_error_escape_sequence;
throw new PatternSyntaxException(msg, replaceText, i);
}
break;
case 'x':
if (i + 2 < length) {
int parsedInt;
try {
parsedInt = Integer.parseInt(replaceText.substring(i + 1, i + 3), 16);
if (parsedInt < 0)
throw new NumberFormatException();
} catch (NumberFormatException e) {
String msg = SearchMessages.PatternConstructor_error_hex_escape_sequence;
throw new PatternSyntaxException(msg, replaceText, i);
}
interpretRetainCase(buf, (char) parsedInt);
i += 2;
} else {
String msg = SearchMessages.PatternConstructor_error_hex_escape_sequence;
throw new PatternSyntaxException(msg, replaceText, i);
}
break;
case 'u':
if (i + 4 < length) {
int parsedInt;
try {
parsedInt = Integer.parseInt(replaceText.substring(i + 1, i + 5), 16);
if (parsedInt < 0)
throw new NumberFormatException();
} catch (NumberFormatException e) {
String msg = SearchMessages.PatternConstructor_error_unicode_escape_sequence;
throw new PatternSyntaxException(msg, replaceText, i);
}
interpretRetainCase(buf, (char) parsedInt);
i += 4;
} else {
String msg = SearchMessages.PatternConstructor_error_unicode_escape_sequence;
throw new PatternSyntaxException(msg, replaceText, i);
}
break;
case 'C':
if (foundText.toUpperCase().equals(foundText)) // is whole match upper-case?
fRetainCaseMode = RC_UPPER;
else if (foundText.toLowerCase().equals(foundText)) // is whole match lower-case?
fRetainCaseMode = RC_LOWER;
else if (Character.isUpperCase(foundText.charAt(0))) // is first character upper-case?
fRetainCaseMode = RC_FIRSTUPPER;
else
fRetainCaseMode = RC_MIXED;
break;
default:
// unknown escape k: append uninterpreted \k
buf.append('\\').append(ch);
break;
}
return i;
}
/**
* Interprets current Retain Case mode (all upper-case,all lower-case,capitalized or mixed) and
* appends the character <code>ch</code> to <code>buf</code> after processing.
*
* @param buf the output buffer
* @param ch the character to process
*/
private void interpretRetainCase(StringBuffer buf, char ch) {
if (fRetainCaseMode == RC_UPPER)
buf.append(Character.toUpperCase(ch));
else if (fRetainCaseMode == RC_LOWER)
buf.append(Character.toLowerCase(ch));
else if (fRetainCaseMode == RC_FIRSTUPPER) {
buf.append(Character.toUpperCase(ch));
fRetainCaseMode = RC_MIXED;
} else
buf.append(ch);
}
}
}