/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.painless; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.script.ScriptException; import java.nio.CharBuffer; import java.util.Arrays; import java.util.HashSet; import java.util.regex.Pattern; import static java.util.Collections.singletonMap; public class RegexTests extends ScriptTestCase { @Override protected Settings scriptEngineSettings() { // Enable regexes just for this test. They are disabled by default. return Settings.builder() .put(CompilerSettings.REGEX_ENABLED.getKey(), true) .build(); } public void testPatternAfterReturn() { assertEquals(true, exec("return 'foo' ==~ /foo/")); assertEquals(false, exec("return 'bar' ==~ /foo/")); } public void testBackslashEscapesForwardSlash() { assertEquals(true, exec("'//' ==~ /\\/\\//")); } public void testBackslashEscapeBackslash() { // Both of these are single backslashes but java escaping + Painless escaping.... assertEquals(true, exec("'\\\\' ==~ /\\\\/")); } public void testRegexIsNonGreedy() { assertEquals(true, exec("def s = /\\\\/.split('.\\\\.'); return s[1] ==~ /\\./")); } public void testPatternAfterAssignment() { assertEquals(true, exec("def a = /foo/; return 'foo' ==~ a")); } public void testPatternInIfStement() { assertEquals(true, exec("if (/foo/.matcher('foo').matches()) { return true } else { return false }")); assertEquals(true, exec("if ('foo' ==~ /foo/) { return true } else { return false }")); } public void testPatternAfterInfixBoolean() { assertEquals(true, exec("return false || /foo/.matcher('foo').matches()")); assertEquals(true, exec("return true && /foo/.matcher('foo').matches()")); assertEquals(true, exec("return false || 'foo' ==~ /foo/")); assertEquals(true, exec("return true && 'foo' ==~ /foo/")); } public void testPatternAfterUnaryNotBoolean() { assertEquals(false, exec("return !/foo/.matcher('foo').matches()")); assertEquals(true, exec("return !/foo/.matcher('bar').matches()")); } public void testInTernaryCondition() { assertEquals(true, exec("return /foo/.matcher('foo').matches() ? true : false")); assertEquals(1, exec("def i = 0; i += /foo/.matcher('foo').matches() ? 1 : 1; return i")); assertEquals(true, exec("return 'foo' ==~ /foo/ ? true : false")); assertEquals(1, exec("def i = 0; i += 'foo' ==~ /foo/ ? 1 : 1; return i")); } public void testInTernaryTrueArm() { assertEquals(true, exec("def i = true; return i ? /foo/.matcher('foo').matches() : false")); assertEquals(true, exec("def i = true; return i ? 'foo' ==~ /foo/ : false")); } public void testInTernaryFalseArm() { assertEquals(true, exec("def i = false; return i ? false : 'foo' ==~ /foo/")); } public void testRegexInFunction() { assertEquals(true, exec("boolean m(String s) {/foo/.matcher(s).matches()} m('foo')")); assertEquals(true, exec("boolean m(String s) {s ==~ /foo/} m('foo')")); } public void testReturnRegexFromFunction() { assertEquals(true, exec("Pattern m(boolean a) {a ? /foo/ : /bar/} m(true).matcher('foo').matches()")); assertEquals(true, exec("Pattern m(boolean a) {a ? /foo/ : /bar/} 'foo' ==~ m(true)")); assertEquals(false, exec("Pattern m(boolean a) {a ? /foo/ : /bar/} m(false).matcher('foo').matches()")); assertEquals(false, exec("Pattern m(boolean a) {a ? /foo/ : /bar/} 'foo' ==~ m(false)")); } public void testCallMatcherDirectly() { assertEquals(true, exec("return /foo/.matcher('foo').matches()")); assertEquals(false, exec("return /foo/.matcher('bar').matches()")); } public void testFindInIf() { assertEquals(true, exec("if ('fooasdfbasdf' =~ /foo/) {return true} else {return false}")); assertEquals(true, exec("if ('1fooasdfbasdf' =~ /foo/) {return true} else {return false}")); assertEquals(false, exec("if ('1f11ooasdfbasdf' =~ /foo/) {return true} else {return false}")); } public void testFindCastToBoolean() { assertEquals(true, exec("return (boolean)('fooasdfbasdf' =~ /foo/)")); assertEquals(true, exec("return (boolean)('111fooasdfbasdf' =~ /foo/)")); assertEquals(false, exec("return (boolean)('fo11oasdfbasdf' =~ /foo/)")); } public void testFindOrStringConcat() { assertEquals(true, exec("return 'f' + 'o' + 'o' =~ /foo/")); } public void testFindOfDef() { assertEquals(true, exec("def s = 'foo'; return s =~ /foo/")); } public void testFindOnInput() { assertEquals(true, exec("return params.s =~ /foo/", singletonMap("s", "fooasdfdf"), true)); assertEquals(false, exec("return params.s =~ /foo/", singletonMap("s", "11f2ooasdfdf"), true)); } public void testGroup() { assertEquals("foo", exec("Matcher m = /foo/.matcher('foo'); m.find(); return m.group()")); } public void testNumberedGroup() { assertEquals("o", exec("Matcher m = /(f)(o)o/.matcher('foo'); m.find(); return m.group(2)")); } public void testNamedGroup() { assertEquals("o", exec("Matcher m = /(?<first>f)(?<second>o)o/.matcher('foo'); m.find(); return m.namedGroup('second')")); } // Make sure some methods on Pattern are whitelisted public void testSplit() { assertArrayEquals(new String[] {"cat", "dog"}, (String[]) exec("/,/.split('cat,dog')")); } public void testSplitAsStream() { assertEquals(new HashSet<>(Arrays.asList("cat", "dog")), exec("/,/.splitAsStream('cat,dog').collect(Collectors.toSet())")); } // Make sure the flags are set public void testMultilineFlag() { assertEquals(Pattern.MULTILINE, exec("/./m.flags()")); } public void testSinglelineFlag() { assertEquals(Pattern.DOTALL, exec("/./s.flags()")); } public void testInsensitiveFlag() { assertEquals(Pattern.CASE_INSENSITIVE, exec("/./i.flags()")); } public void testExtendedFlag() { assertEquals(Pattern.COMMENTS, exec("/./x.flags()")); } public void testUnicodeCaseFlag() { assertEquals(Pattern.UNICODE_CASE, exec("/./u.flags()")); } public void testUnicodeCharacterClassFlag() { assertEquals(Pattern.UNICODE_CASE | Pattern.UNICODE_CHARACTER_CLASS, exec("/./U.flags()")); } public void testLiteralFlag() { assertEquals(Pattern.LITERAL, exec("/./l.flags()")); } public void testCanonicalEquivalenceFlag() { assertEquals(Pattern.CANON_EQ, exec("/./c.flags()")); } public void testManyFlags() { assertEquals(Pattern.CANON_EQ | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.COMMENTS, exec("/./ciux.flags()")); } public void testReplaceAllMatchesString() { assertEquals("thE qUIck brOwn fOx", exec("'the quick brown fox'.replaceAll(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))")); } public void testReplaceAllMatchesCharSequence() { CharSequence charSequence = CharBuffer.wrap("the quick brown fox"); assertEquals("thE qUIck brOwn fOx", exec("params.a.replaceAll(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence), true)); } public void testReplaceAllNoMatchString() { assertEquals("i am cat", exec("'i am cat'.replaceAll(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))")); } public void testReplaceAllNoMatchCharSequence() { CharSequence charSequence = CharBuffer.wrap("i am cat"); assertEquals("i am cat", exec("params.a.replaceAll(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence), true)); } public void testReplaceAllQuoteReplacement() { assertEquals("th/E q/U/Ick br/Own f/Ox", exec("'the quick brown fox'.replaceAll(/[aeiou]/, m -> '/' + m.group().toUpperCase(Locale.ROOT))")); assertEquals("th$E q$U$Ick br$Own f$Ox", exec("'the quick brown fox'.replaceAll(/[aeiou]/, m -> '$' + m.group().toUpperCase(Locale.ROOT))")); } public void testReplaceFirstMatchesString() { assertEquals("thE quick brown fox", exec("'the quick brown fox'.replaceFirst(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))")); } public void testReplaceFirstMatchesCharSequence() { CharSequence charSequence = CharBuffer.wrap("the quick brown fox"); assertEquals("thE quick brown fox", exec("params.a.replaceFirst(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence), true)); } public void testReplaceFirstNoMatchString() { assertEquals("i am cat", exec("'i am cat'.replaceFirst(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))")); } public void testReplaceFirstNoMatchCharSequence() { CharSequence charSequence = CharBuffer.wrap("i am cat"); assertEquals("i am cat", exec("params.a.replaceFirst(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence), true)); } public void testReplaceFirstQuoteReplacement() { assertEquals("th/E quick brown fox", exec("'the quick brown fox'.replaceFirst(/[aeiou]/, m -> '/' + m.group().toUpperCase(Locale.ROOT))")); assertEquals("th$E quick brown fox", exec("'the quick brown fox'.replaceFirst(/[aeiou]/, m -> '$' + m.group().toUpperCase(Locale.ROOT))")); } public void testCantUsePatternCompile() { IllegalArgumentException e = expectScriptThrows(IllegalArgumentException.class, () -> { exec("Pattern.compile('aa')"); }); assertEquals("Unknown call [compile] with [1] arguments on type [Pattern].", e.getMessage()); } public void testBadRegexPattern() { ScriptException e = expectThrows(ScriptException.class, () -> { exec("/\\ujjjj/"); // Invalid unicode }); assertEquals("Error compiling regex: Illegal Unicode escape sequence", e.getCause().getMessage()); // And make sure the location of the error points to the offset inside the pattern assertScriptStack(e, "/\\ujjjj/", " ^---- HERE"); } public void testRegexAgainstNumber() { ClassCastException e = expectScriptThrows(ClassCastException.class, () -> { exec("12 ==~ /cat/"); }); assertEquals("Cannot cast from [int] to [String].", e.getMessage()); } public void testBogusRegexFlag() { IllegalArgumentException e = expectScriptThrows(IllegalArgumentException.class, () -> { exec("/asdf/b", false); // Not picky so we get a non-assertion error }); assertEquals("unexpected token ['b'] was expecting one of [{<EOF>, ';'}].", e.getMessage()); } }