package com.github.sommeri.less4j.js;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**A class for regular expressions with features like Javascript's RegExp, plus Javascript String's replace and split methods (which can take a RegExp parameter). The pure Java implementation (for server-side use) uses Java's Pattern class, unavailable under GWT. The super-sourced GWT implementation simply calls on to the native Javascript classes.
There are a few small incompatibilities between the two implementations. Java-specific constructs in the regular expression syntax (e.g. [a-z&&[^bc]], (?<=foo), \A, \Q) work only in the pure Java implementation, not the GWT implementation, and are not rejected by either. Also, the Javscript-specific constructs $` and $' in the replacement expression work only in the GWT implementation, not the pure Java implementation, which rejects them.
*/
public class JsRegExp {
// In JS syntax, a \ in the replacement string has no special meaning.
// In Java syntax, a \ in the replacement string escapes the next character,
// so we have to translate \ to \\ before passing it to Java.
private static final Pattern REPLACEMENT_BACKSLASH = Pattern.compile("\\\\");
// To get \\, we have to say \\\\\\\\:
// \\\\\\\\ --> Java string unescape --> \\\\
// \\\\ ---> Pattern replacement unescape in replacement preprocessing --> \\
private static final String REPLACEMENT_BACKSLASH_FOR_JAVA = "\\\\\\\\";
// In JS syntax, a $& in the replacement string stands for the whole match.
// In Java syntax, the equivalent is $0, so we have to translate $& to
// $0 before passing it to Java. However, we have to watch out for $$&, which
// is actually a Javascript $$ (see below) followed by a & with no special
// meaning, and must not get translated.
private static final Pattern REPLACEMENT_DOLLAR_AMPERSAND = Pattern.compile("((?:^|\\G|[^$])(?:\\$\\$)*)\\$&");
private static final String REPLACEMENT_DOLLAR_AMPERSAND_FOR_JAVA = "$1\\$0";
// In JS syntax, a $` and $' in the replacement string stand for everything
// before the match and everything after the match.
// In Java syntax, there is no equivalent, so we detect and reject $` and $'.
// However, we have to watch out for $$` and $$', which are actually a JS $$
// (see below) followed by a ` or ' with no special meaning, and must not be
// rejected.
private static final Pattern REPLACEMENT_DOLLAR_APOSTROPHE = Pattern.compile("(?:^|[^$])(?:\\$\\$)*\\$[`']");
// In JS syntax, a $$ in the replacement string stands for a (single) dollar
// sign, $.
// In Java syntax, the equivalent is \$, so we have to translate $$ to \$
// before passing it to Java.
private static final Pattern REPLACEMENT_DOLLAR_DOLLAR = Pattern.compile("\\$\\$");
// To get \$, we have to say \\\\\\$:
// \\\\\\$ --> Java string unescape --> \\\$
// \\\$ ---> Pattern replacement unescape in replacement preprocessing --> \$
private static final String REPLACEMENT_DOLLAR_DOLLAR_FOR_JAVA = "\\\\\\$";
/** Creates a regular expression object from a pattern with no flags.
Parameters:
pattern the Javascript regular expression pattern to compile
Returns:
a new regular expression
Throws:
RuntimeException if the pattern is invalid
*/
public static JsRegExp compile(String pattern) {
return compile(pattern, "");
}
/** Creates a regular expression object from a pattern using the given flags.
Parameters:
pattern the Javascript regular expression pattern to compile
flags the flags string, containing at most one occurrence of 'g' (#getGlobal()), 'i' (#getIgnoreCase()), or 'm' (#getMultiline()).
Returns:
a new regular expression
Throws:
RuntimeException if the pattern or the flags are invalid */
public static JsRegExp compile(String pattern, String flags) {
// Parse flags
boolean globalFlag = false;
int javaPatternFlags = Pattern.UNIX_LINES;
for (char flag : parseFlags(flags)) {
switch (flag) {
case 'g':
globalFlag = true;
break;
case 'i':
javaPatternFlags |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
break;
case 'm':
javaPatternFlags |= Pattern.MULTILINE;
break;
default:
throw new IllegalArgumentException("Unknown regexp flag: '" + flag + "'");
}
}
Pattern javaPattern = Pattern.compile(pattern, javaPatternFlags);
return new JsRegExp(pattern, javaPattern, globalFlag);
}
/** Parses a flags string as a set of characters. Does not reject unknown flags.
Parameters:
flags the flag string to parse
Returns:
a set of flags
Throws:
IllegalArgumentException if a flag is duplicated */
private static Set<Character> parseFlags(String flags) {
Set<Character> flagsSet = new HashSet<Character>(flags.length());
for (int flagIndex = 0; flagIndex < flags.length(); flagIndex++) {
char flag = flags.charAt(flagIndex);
if (!flagsSet.add(flag)) {
throw new IllegalArgumentException("Flag cannot be specified twice: '" + flag + "'");
}
}
return flagsSet;
}
private final boolean globalFlag;
private int lastIndex;
private final Pattern pattern;
private final String source;
private JsRegExp(String source, Pattern pattern, boolean globalFlag) {
this.source = source;
this.pattern = pattern;
this.globalFlag = globalFlag;
lastIndex = 0;
}
/** Applies the regular expression to the given string. This call affects the value returned by #getLastIndex() if the global flag is set.
Parameters:
input the string to apply the regular expression to
Returns:
a match result if the string matches, else null*/
public MatchResult exec(String input) {
// Start the search at lastIndex if the global flag is true.
int searchStartIndex = (globalFlag) ? lastIndex : 0;
Matcher matcher;
if (input == null || searchStartIndex < 0 || searchStartIndex > input.length()) {
// Avoid exceptions: Javascript is more tolerant than Java
matcher = null;
} else {
matcher = pattern.matcher(input);
if (!matcher.find(searchStartIndex)) {
matcher = null;
}
}
if (matcher != null) {
// Match: create a result
// Retrieve the matched groups.
int groupCount = matcher.groupCount();
List<String> groups = new ArrayList<String>(1 + groupCount);
for (int group = 0; group <= groupCount; group++) {
groups.add(matcher.group(group));
}
if (globalFlag) {
lastIndex = matcher.end();
}
return new MatchResult(matcher.start(), input, groups);
} else {
// No match
if (globalFlag) {
lastIndex = 0;
}
return null;
}
}
/** Returns whether the regular expression captures all occurrences of the pattern.(*/
public boolean getGlobal() {
return globalFlag;
}
/** Returns whether the regular expression ignores case.*/
public boolean getIgnoreCase() {
return (pattern.flags() & Pattern.CASE_INSENSITIVE) != 0;
}
/** Returns the zero-based position at which to start the next match. The return value is not defined if the global flag is not set. After a call to #exec(String ) or #test(String ), this method returns the next position following the most recent match.
See also: #getGlobal()*/
public int getLastIndex() {
return lastIndex;
}
/** Returns whether '$' and '^' match line returns ('\n' and '\r') in addition to the beginning or end of the string.*/
public boolean getMultiline() {
return (pattern.flags() & Pattern.MULTILINE) != 0;
}
/** Returns the pattern string of the regular expression.*/
public String getSource() {
return source;
}
/** Returns the input string with the part(s) matching the regular expression replaced with the replacement string. If the global flag is set, replaces all matches of the regular expression. Otherwise, replaces the first match of the regular expression. As per Javascript semantics, backslashes in the replacement string get no special treatment, but the replacement string can use the following special patterns:
$1, $2, ... $99 - inserts the n'th group matched by the regular expression.
$& - inserts the entire string matched by the regular expression.
$$ - inserts a $.
Note: $` and $' are *not* supported in the pure Java implementation, and throw an exception.
Parameters:
input the string in which the regular expression is to be searched.
replacement the replacement string.
Returns:
the input string with the regular expression replaced by the replacement string.
Throws:
RuntimeException if replacement is invalid*/
public String replace(String input, String replacement) {
// Replace \ in the replacement with \\ to escape it for Java replace.
replacement = REPLACEMENT_BACKSLASH.matcher(replacement).replaceAll(REPLACEMENT_BACKSLASH_FOR_JAVA);
// Replace the Javascript-ese $& in the replacement with Java-ese $0, but
// watch out for $$&, which should stay $$&, to be changed to \$& below.
replacement = REPLACEMENT_DOLLAR_AMPERSAND.matcher(replacement).replaceAll(REPLACEMENT_DOLLAR_AMPERSAND_FOR_JAVA);
// Test for Javascript-ese $` and $', which we do not support in the pure
// Java version.
if (REPLACEMENT_DOLLAR_APOSTROPHE.matcher(replacement).find()) {
throw new UnsupportedOperationException("$` and $' replacements are not supported");
}
// Replace the Javascript-ese $$ in the replacement with Java-ese \$.
replacement = REPLACEMENT_DOLLAR_DOLLAR.matcher(replacement).replaceAll(REPLACEMENT_DOLLAR_DOLLAR_FOR_JAVA);
return globalFlag ? pattern.matcher(input).replaceAll(replacement) : pattern.matcher(input).replaceFirst(replacement);
}
/** Sets the zero-based position at which to start the next match.*/
public void setLastIndex(int lastIndex) {
this.lastIndex = lastIndex;
}
/** Splits the input string around matches of the regular expression. If the regular expression is completely empty, splits the input string into its constituent characters. If the regular expression is not empty but matches an empty string, the results are not well defined.
Parameters:
input the string to be split.
Returns:
the strings split off, any of which may be empty.*/
public SplitResult split(String input) {
return split(input, -1);
}
/** Splits the input string around matches of the regular expression. If the regular expression is completely empty, splits the input string into its constituent characters. If the regular expression is not empty but matches an empty string, the results are not well defined. Note: There are some browser inconsistencies with this implementation, as it is delegated to the browser, and no browser follows the spec completely. A major difference is that IE will exclude empty strings in the result.
Parameters:
input the string to be split.
limit the the maximum number of strings to split off and return, ignoring the rest of the input string. If negative, there is no limit.
Returns:
the strings split off, any of which may be empty.*/
public SplitResult split(String input, int limit) {
String[] result;
if (source.length() == 0) {
// Javascript split using a completely empty regular expression splits the
// string into its constituent characters.
int resultLength = input.length();
if (resultLength > limit && limit >= 0) {
resultLength = limit;
}
result = new String[resultLength];
for (int i = 0; i < resultLength; i++) {
result[i] = input.substring(i, i + 1);
}
} else {
result = pattern.split(input, limit < 0 ? -1 : (limit + 1));
if (result.length > limit && limit >= 0) {
// Chop off the unsplit part of the string which has been put in
// result[limit]. Javascript split does not return it.
String[] realResult = new String[limit];
for (int i = 0; i < limit; i++) {
realResult[i] = result[i];
}
result = realResult;
}
}
return new SplitResult(result);
}
/** Determines if the regular expression matches the given string. This call affects the value returned by #getLastIndex() if the global flag is set. Equivalent to: exec(input) != null
Parameters:
input the string to apply the regular expression to
Returns:
whether the regular expression matches the given string.*/
public boolean test(String input) {
return exec(input) != null;
}
}