package com.laytonsmith.core.functions; import com.laytonsmith.PureUtilities.Common.ReflectionUtils; import com.laytonsmith.annotations.api; import com.laytonsmith.annotations.core; import com.laytonsmith.annotations.seealso; import com.laytonsmith.core.CHVersion; import com.laytonsmith.core.Optimizable; import com.laytonsmith.core.ParseTree; import com.laytonsmith.core.Static; import com.laytonsmith.core.compiler.FileOptions; import com.laytonsmith.core.constructs.CArray; import com.laytonsmith.core.constructs.CFunction; import com.laytonsmith.core.constructs.CInt; import com.laytonsmith.core.constructs.CNull; import com.laytonsmith.core.constructs.CString; import com.laytonsmith.core.constructs.Construct; import com.laytonsmith.core.constructs.Target; import com.laytonsmith.core.environments.Environment; import com.laytonsmith.core.exceptions.CRE.CRECastException; import com.laytonsmith.core.exceptions.CRE.CREFormatException; import com.laytonsmith.core.exceptions.CRE.CREThrowable; import com.laytonsmith.core.exceptions.ConfigCompileException; import com.laytonsmith.core.exceptions.ConfigRuntimeException; import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; /** * */ @core public class Regex { public static String docs() { return "This class provides regular expression functions. For more details, please see the page on " + "[[CommandHelper/Regex|regular expressions]]. Note that all the functions are just passthroughs" + " to the Java regex mechanism. If you need to set a flag on the regex, where the api calls" + " for a pattern, instead send array('pattern', 'flags') where flags is any of i, m, or s." + " Alternatively, using the embedded flag system that Java provides is also valid. Named captures are" + " also supported if you are using Java 7, otherwise they are not supported."; } @api public static class reg_match extends AbstractFunction implements Optimizable { @Override public String getName() { return "reg_match"; } @Override public Integer[] numArgs() { return new Integer[]{2}; } @Override public String docs() { return "array {pattern, subject} Searches for the given pattern, and returns an array with the results. Captures are supported." + " If the pattern is not found anywhere in the subject, an empty array is returned. The indexes of the array" + " follow typical regex fashion; the 0th element is the whole match, and 1-n are the captures specified in" + " the regex."; } @Override public Class<? extends CREThrowable>[] thrown() { return new Class[]{CREFormatException.class}; } @Override public boolean isRestricted() { return false; } @Override public CHVersion since() { return CHVersion.V3_2_0; } @Override public Boolean runAsync() { return null; } @Override public Construct exec(Target t, Environment env, Construct... args) throws ConfigRuntimeException { Pattern pattern = getPattern(args[0], t); String subject = args[1].val(); CArray ret = CArray.GetAssociativeArray(t); Matcher m = pattern.matcher(subject); if (m.find()) { ret.set(0, new CString(m.group(0), t), t); for (int i = 1; i <= m.groupCount(); i++) { if (m.group(i) == null) { ret.set(i, CNull.NULL, t); } else { ret.set(i, new CString(m.group(i), t), t); } } //Named groups are only supported in Java 7, but we can //dynamically enable this feature if they have it. Set<String> namedGroups = getNamedGroups(pattern.pattern()); try { for (String key : namedGroups) { ret.set(key, (String) ReflectionUtils.invokeMethod(Matcher.class, m, "group", new Class[]{String.class}, new Object[]{key}), t); } } catch (ReflectionUtils.ReflectionException ex) { throw new CREFormatException("Named captures are only supported with Java 7.", t); } } return ret; } @Override public ParseTree optimizeDynamic(Target t, List<ParseTree> children, FileOptions fileOptions) throws ConfigCompileException, ConfigRuntimeException { if (!children.get(0).getData().isDynamic()) { getPattern(children.get(0).getData(), t); } return null; } @Override public Set<OptimizationOption> optimizationOptions() { return EnumSet.of( OptimizationOption.CONSTANT_OFFLINE, OptimizationOption.CACHE_RETURN, OptimizationOption.OPTIMIZE_DYNAMIC, OptimizationOption.NO_SIDE_EFFECTS ); } @Override public ExampleScript[] examples() throws ConfigCompileException { return new ExampleScript[]{ new ExampleScript("Basic usage", "reg_match('(\\\\d)(\\\\d)(\\\\d)', 'abc123')"), //Java 7 can't be assumed to be working on the system running the doc gen, so we'll hardcode these. new ExampleScript("Named captures (Only works if your system is running Java 7)", "reg_match('abc(?<foo>\\\\d+)(xyz)', 'abc123xyz')", "{0: abc123xyz, 1: 123, 2: xyz, foo: 123}"), new ExampleScript("Named captures with backreferences (Only works if your system is running Java 7)", "reg_match('abc(?<foo>\\\\d+)def\\\\k<foo>', 'abc123def123')['foo']", "123") }; } } @api public static class reg_match_all extends AbstractFunction implements Optimizable { @Override public String getName() { return "reg_match_all"; } @Override public Integer[] numArgs() { return new Integer[]{2}; } @Override public String docs() { return "array {pattern, subject} Searches subject for all matches to the regular expression given in pattern, unlike reg_match," + " which just returns the first match."; } @Override public Class<? extends CREThrowable>[] thrown() { return new Class[]{CREFormatException.class}; } @Override public boolean isRestricted() { return false; } @Override public CHVersion since() { return CHVersion.V3_2_0; } @Override public Boolean runAsync() { return null; } @Override public Construct exec(Target t, Environment env, Construct... args) throws ConfigRuntimeException { Pattern pattern = getPattern(args[0], t); String subject = args[1].val(); CArray fret = new CArray(t); Matcher m = pattern.matcher(subject); Set<String> namedGroups = getNamedGroups(pattern.pattern()); while (m.find()) { CArray ret = CArray.GetAssociativeArray(t); ret.set(0, new CString(m.group(0), t), t); for (int i = 1; i <= m.groupCount(); i++) { ret.set(i, new CString(m.group(i), t), t); } //Named groups are only supported in Java 7, but we can //dynamically enable this feature if they have it. try { for (String key : namedGroups) { ret.set(key, (String) ReflectionUtils.invokeMethod(Matcher.class, m, "group", new Class[]{String.class}, new Object[]{key}), t); } } catch (ReflectionUtils.ReflectionException e) { throw new CREFormatException("Named captures are only supported with Java 7.", t); } fret.push(ret, t); } return fret; } @Override public ParseTree optimizeDynamic(Target t, List<ParseTree> children, FileOptions fileOptions) throws ConfigCompileException, ConfigRuntimeException { if (!children.get(0).getData().isDynamic()) { getPattern(children.get(0).getData(), t); } return null; } @Override public Set<OptimizationOption> optimizationOptions() { return EnumSet.of( OptimizationOption.CONSTANT_OFFLINE, OptimizationOption.CACHE_RETURN, OptimizationOption.OPTIMIZE_DYNAMIC, OptimizationOption.NO_SIDE_EFFECTS ); } @Override public ExampleScript[] examples() throws ConfigCompileException { return new ExampleScript[]{ new ExampleScript("Basic usage", "reg_match_all('(\\\\d{3})', 'abc123456')"), //Same thing here, can't guarantee we're running Java 7 when these are generated. new ExampleScript("Named captures (Only works if your system is running Java 7)", "reg_match_all('abc(?<foo>\\\\d+)(xyz)', 'abc123xyz')[0]['foo']", "123"), new ExampleScript("Named captures with backreferences (Only works if your system is running Java 7)", "reg_match_all('abc(?<foo>\\\\d+)def\\\\k<foo>', 'abc123def123')[0]['foo']", "123") }; } } @api public static class reg_replace extends AbstractFunction implements Optimizable { @Override public String getName() { return "reg_replace"; } @Override public Integer[] numArgs() { return new Integer[]{3}; } @Override public String docs() { return "string {pattern, replacement, subject} Replaces any occurances of pattern with the replacement in subject." + " Back references are allowed."; } @Override public Class<? extends CREThrowable>[] thrown() { return new Class[]{CREFormatException.class}; } @Override public boolean isRestricted() { return false; } @Override public CHVersion since() { return CHVersion.V3_2_0; } @Override public Boolean runAsync() { return null; } @Override public Construct exec(Target t, Environment env, Construct... args) throws ConfigRuntimeException { Pattern pattern = getPattern(args[0], t); String replacement = args[1].val(); String subject = args[2].val(); String ret = ""; try { ret = pattern.matcher(subject).replaceAll(replacement); } catch (IndexOutOfBoundsException e) { throw new CREFormatException("Expecting a regex group at parameter 1 of reg_replace", t); } catch (IllegalArgumentException e) { throw new CREFormatException(e.getMessage(), t); } return new CString(ret, t); } @Override public ParseTree optimizeDynamic(Target t, List<ParseTree> children, FileOptions fileOptions) throws ConfigCompileException, ConfigRuntimeException { ParseTree data = children.get(0); if (!data.getData().isDynamic()) { String pattern = data.getData().val(); if (isLiteralRegex(pattern)) { //We want to replace this with replace() //Note the alternative order of arguments ParseTree replace = new ParseTree(new CFunction("replace", t), data.getFileOptions()); replace.addChildAt(0, children.get(2)); //subject -> main replace.addChildAt(1, new ParseTree(new CString(getLiteralRegex(pattern), t), replace.getFileOptions())); //pattern -> what replace.addChildAt(2, children.get(1)); //replacement -> that return replace; } else { getPattern(data.getData(), t); } } return null; // if(!children.get(0).getData().isDynamic()){ // getPattern(children.get(0).getData(), t); // } // return null; } @Override public Set<OptimizationOption> optimizationOptions() { return EnumSet.of( OptimizationOption.CONSTANT_OFFLINE, OptimizationOption.CACHE_RETURN, OptimizationOption.OPTIMIZE_DYNAMIC, OptimizationOption.NO_SIDE_EFFECTS ); } @Override public ExampleScript[] examples() throws ConfigCompileException { return new ExampleScript[]{ new ExampleScript("Basic usage", "reg_replace('\\\\d', 'Z', '123abc')"), new ExampleScript("Using backreferences", "reg_replace('abc(\\\\d+)', '$1', 'abc123'"), new ExampleScript("Using backreferences with named captures (Only works if your system is running Java 7)", "reg_replace('abc(?<foo>\\\\d+)', '${foo}', 'abc123')", "123") }; } } @api @seealso({StringHandling.split.class, ArrayHandling.array_implode.class}) public static class reg_split extends AbstractFunction implements Optimizable { private final static String split = new StringHandling.split().getName(); @Override public String getName() { return "reg_split"; } @Override public Integer[] numArgs() { return new Integer[]{2, 3}; } @Override public String docs() { return "array {pattern, subject, [limit]} Splits a string on the given regex, and returns an array of the parts. If" + " nothing matched, an array with one element, namely the original subject, is returned." + " Limit defaults to infinity, but if set, only" + " that number of splits will occur."; } @Override public Class<? extends CREThrowable>[] thrown() { return new Class[]{CREFormatException.class, CRECastException.class}; } @Override public boolean isRestricted() { return false; } @Override public CHVersion since() { return CHVersion.V3_2_0; } @Override public Boolean runAsync() { return null; } @Override public Construct exec(Target t, Environment env, Construct... args) throws ConfigRuntimeException { Pattern pattern = getPattern(args[0], t); String subject = args[1].val(); /** * We use a different indexing notation than Java's regex split. In the case of 0 for the limit, we will * still return an array of length 1, assuming there are actual splits available. In Java, a split of 0 will * return the same as length 1. In our method though, the limit is the number of splits themselves, so 1 * means that the array will be length 2, as in, there were 1 splits performed. This matches the behavior of * split(). */ int limit = Integer.MAX_VALUE - 1; if (args.length >= 3) { limit = Static.getInt32(args[2], t); } String[] rsplit = pattern.split(subject, limit + 1); CArray ret = new CArray(t); for (String split : rsplit) { ret.push(new CString(split, t), t); } return ret; } @Override public ParseTree optimizeDynamic(Target t, List<ParseTree> children, FileOptions fileOptions) throws ConfigCompileException, ConfigRuntimeException { ParseTree data = children.get(0); if (!data.getData().isDynamic()) { String pattern = data.getData().val(); if (isLiteralRegex(pattern)) { //We want to replace this with split() ParseTree splitNode = new ParseTree(new CFunction(split, t), data.getFileOptions()); splitNode.addChildAt(0, new ParseTree(new CString(getLiteralRegex(pattern), t), splitNode.getFileOptions())); splitNode.addChildAt(1, children.get(1)); return splitNode; } else { getPattern(data.getData(), t); } } return null; } @Override public Set<OptimizationOption> optimizationOptions() { return EnumSet.of( OptimizationOption.CACHE_RETURN, OptimizationOption.OPTIMIZE_DYNAMIC, OptimizationOption.NO_SIDE_EFFECTS ); } @Override public ExampleScript[] examples() throws ConfigCompileException { return new ExampleScript[]{ new ExampleScript("Basic usage", "reg_split('\\\\d', 'a1b2c3')") }; } } @api public static class reg_count extends AbstractFunction implements Optimizable { @Override public String getName() { return "reg_count"; } @Override public Integer[] numArgs() { return new Integer[]{2}; } @Override public String docs() { return "int {pattern, subject} Counts the number of occurances in the subject."; } @Override public Class<? extends CREThrowable>[] thrown() { return new Class[]{CREFormatException.class}; } @Override public boolean isRestricted() { return false; } @Override public CHVersion since() { return CHVersion.V3_2_0; } @Override public Boolean runAsync() { return null; } @Override public Construct exec(Target t, Environment env, Construct... args) throws ConfigRuntimeException { Pattern pattern = getPattern(args[0], t); String subject = args[1].val(); long ret = 0; Matcher m = pattern.matcher(subject); while (m.find()) { ret++; } return new CInt(ret, t); } @Override public ParseTree optimizeDynamic(Target t, List<ParseTree> children, FileOptions fileOptions) throws ConfigCompileException, ConfigRuntimeException { if (!children.get(0).getData().isDynamic()) { getPattern(children.get(0).getData(), t); } return null; } @Override public Set<OptimizationOption> optimizationOptions() { return EnumSet.of( OptimizationOption.CONSTANT_OFFLINE, OptimizationOption.CACHE_RETURN, OptimizationOption.OPTIMIZE_DYNAMIC, OptimizationOption.NO_SIDE_EFFECTS ); } @Override public ExampleScript[] examples() throws ConfigCompileException { return new ExampleScript[]{ new ExampleScript("Basic usage", "reg_count('\\\\d', '123abc')") }; } } @api public static class reg_escape extends AbstractFunction implements Optimizable { @Override public Class<? extends CREThrowable>[] thrown() { return null; } @Override public boolean isRestricted() { return false; } @Override public Boolean runAsync() { return null; } @Override public Construct exec(Target t, Environment environment, Construct... args) throws ConfigRuntimeException { return new CString(java.util.regex.Pattern.quote(args[0].val()), t); } @Override public String getName() { return "reg_escape"; } @Override public Integer[] numArgs() { return new Integer[]{1}; } @Override public String docs() { return "string {arg} Escapes arg so that it may be used directly in a regular expression, without fear that" + " it will have special meaning; that is, it escapes all special characters. Use this if you need" + " to use user input or similar as a literal search index."; } @Override public CHVersion since() { return CHVersion.V3_3_1; } @Override public Set<OptimizationOption> optimizationOptions() { return EnumSet.of( OptimizationOption.CONSTANT_OFFLINE, OptimizationOption.CACHE_RETURN, OptimizationOption.NO_SIDE_EFFECTS ); } @Override public ExampleScript[] examples() throws ConfigCompileException { return new ExampleScript[]{ new ExampleScript("Basic usage", "reg_escape('\\\\d+')") }; } } private static Pattern getPattern(Construct c, Target t) throws ConfigRuntimeException { String regex = ""; int flags = 0; String sflags = ""; if (c instanceof CArray) { CArray ca = (CArray) c; regex = ca.get(0, t).val(); sflags = ca.get(1, t).val(); for (int i = 0; i < sflags.length(); i++) { if (sflags.toLowerCase().charAt(i) == 'i') { flags |= java.util.regex.Pattern.CASE_INSENSITIVE; } else if (sflags.toLowerCase().charAt(i) == 'm') { flags |= java.util.regex.Pattern.MULTILINE; } else if (sflags.toLowerCase().charAt(i) == 's') { flags |= java.util.regex.Pattern.DOTALL; } else { throw new CREFormatException("Unrecognized flag: " + sflags.toLowerCase().charAt(i), t); } } } else { regex = c.val(); } try { return Pattern.compile(regex, flags); } catch (PatternSyntaxException e) { throw new CREFormatException(e.getMessage(), t); } } private static boolean isLiteralRegex(String regex) { //These are the special characters in a regex. If a regex does not contain any of these //characters, we can use a faster method in many cases, though the extra overhead of doing //this check only makes sense during optimization, not runtime. //We also are going to check for the special case where the whole regex starts with \Q and ends with \E, which //indicates that they did something like: reg_split(reg_escape('literal string'), '') which is an easily //optimizable case, but we will have to transform the regex to get the actual split index, but that's up //to the function to call getLiteralRegex. If the internal of the regex further contains more \Q or \E identifiers, //they are doing something more complex, so we're just gonna forgo optimizing that. if (regex.startsWith("\\Q") && regex.endsWith("\\E") && !regex.substring(2, regex.length() - 2).contains("\\Q") && !regex.substring(2, regex.length() - 2).contains("\\E")) { return true; } String chars = "[\\^$.|?*+()"; for (int i = 0; i < chars.length(); i++) { if (regex.contains(Character.toString(chars.charAt(i)))) { return false; } } return true; } private static String getLiteralRegex(String regex) { if (regex.startsWith("\\Q") && regex.endsWith("\\E") && !regex.substring(2, regex.length() - 2).contains("\\Q") && !regex.substring(2, regex.length() - 2).contains("\\E")) { return regex.substring(2, regex.length() - 2); } else { return regex; } } private static final Pattern NAMED_GROUP = Pattern.compile("\\(\\?<([a-zA-Z][a-zA-Z0-9]*)>"); private static Set<String> getNamedGroups(String regex) { Matcher m = NAMED_GROUP.matcher(regex); Set<String> ret = new HashSet<String>(); while (m.find()) { ret.add(m.group(1)); } return ret; } }