/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.operator.scalar; import com.facebook.presto.spi.function.Description; import com.facebook.presto.spi.function.LiteralParameters; import com.facebook.presto.spi.function.ScalarFunction; import com.facebook.presto.spi.function.SqlType; import com.facebook.presto.spi.type.SqlVarbinary; import com.facebook.presto.spi.type.StandardTypes; import com.facebook.presto.type.ArrayType; import com.facebook.presto.type.LiteralParameter; import com.facebook.presto.type.MapType; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.airlift.slice.Slice; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; import static com.facebook.presto.spi.type.BigintType.BIGINT; import static com.facebook.presto.spi.type.CharType.createCharType; import static com.facebook.presto.spi.type.IntegerType.INTEGER; import static com.facebook.presto.spi.type.VarbinaryType.VARBINARY; import static com.facebook.presto.spi.type.VarcharType.VARCHAR; import static com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType; import static com.facebook.presto.spi.type.VarcharType.createVarcharType; import static com.facebook.presto.sql.analyzer.SemanticErrorCode.FUNCTION_NOT_FOUND; import static com.facebook.presto.util.StructuralTestUtil.mapType; import static com.google.common.base.Strings.repeat; import static java.lang.String.format; public class TestStringFunctions extends AbstractTestFunctions { @BeforeClass public void setUp() { registerScalar(getClass()); } @Description("varchar length") @ScalarFunction(value = "vl", deterministic = true) @LiteralParameters("x") @SqlType(StandardTypes.BIGINT) public static long varcharLength(@LiteralParameter("x") Long param, @SqlType("varchar(x)") Slice slice) { return param; } @ScalarFunction(value = "utf8", deterministic = false) @SqlType(StandardTypes.VARCHAR) public static Slice convertBinaryToVarchar(@SqlType(StandardTypes.VARBINARY) Slice binary) { return binary; } public static String padRight(String s, int n) { return String.format("%1$-" + n + "s", s); } @Test public void testChr() { assertFunction("CHR(65)", createVarcharType(1), "A"); assertFunction("CHR(9731)", createVarcharType(1), "\u2603"); assertFunction("CHR(131210)", createVarcharType(1), new String(Character.toChars(131210))); assertFunction("CHR(0)", createVarcharType(1), "\0"); assertInvalidFunction("CHR(-1)", "Not a valid Unicode code point: -1"); assertInvalidFunction("CHR(1234567)", "Not a valid Unicode code point: 1234567"); assertInvalidFunction("CHR(8589934592)", "Not a valid Unicode code point: 8589934592"); } @Test public void testCodepoint() { assertFunction("CODEPOINT('x')", INTEGER, 0x78); assertFunction("CODEPOINT('\u840C')", INTEGER, 0x840C); assertFunction("CODEPOINT(CHR(128077))", INTEGER, 128077); assertFunction("CODEPOINT(CHR(33804))", INTEGER, 33804); assertInvalidFunction("CODEPOINT('hello')", FUNCTION_NOT_FOUND); assertInvalidFunction("CODEPOINT('\u666E\u5217\u65AF\u6258')", FUNCTION_NOT_FOUND); assertInvalidFunction("CODEPOINT('')", INVALID_FUNCTION_ARGUMENT); } @Test public void testConcat() { assertInvalidFunction("CONCAT()", "There must be two or more concatenation arguments"); assertInvalidFunction("CONCAT('')", "There must be two or more concatenation arguments"); assertFunction("CONCAT('hello', ' world')", VARCHAR, "hello world"); assertFunction("CONCAT('', '')", VARCHAR, ""); assertFunction("CONCAT('what', '')", VARCHAR, "what"); assertFunction("CONCAT('', 'what')", VARCHAR, "what"); assertFunction("CONCAT(CONCAT('this', ' is'), ' cool')", VARCHAR, "this is cool"); assertFunction("CONCAT('this', CONCAT(' is', ' cool'))", VARCHAR, "this is cool"); // // Test concat for non-ASCII assertFunction("CONCAT('hello na\u00EFve', ' world')", VARCHAR, "hello na\u00EFve world"); assertFunction("CONCAT('\uD801\uDC2D', 'end')", VARCHAR, "\uD801\uDC2Dend"); assertFunction("CONCAT('\uD801\uDC2D', 'end', '\uD801\uDC2D')", VARCHAR, "\uD801\uDC2Dend\uD801\uDC2D"); assertFunction("CONCAT(CONCAT('\u4FE1\u5FF5', ',\u7231'), ',\u5E0C\u671B')", VARCHAR, "\u4FE1\u5FF5,\u7231,\u5E0C\u671B"); } @Test public void testLength() { assertFunction("LENGTH('')", BIGINT, 0L); assertFunction("LENGTH('hello')", BIGINT, 5L); assertFunction("LENGTH('Quadratically')", BIGINT, 13L); // // Test length for non-ASCII assertFunction("LENGTH('hello na\u00EFve world')", BIGINT, 17L); assertFunction("LENGTH('\uD801\uDC2Dend')", BIGINT, 4L); assertFunction("LENGTH('\u4FE1\u5FF5,\u7231,\u5E0C\u671B')", BIGINT, 7L); } @Test public void testCharLength() { assertFunction("LENGTH(CAST('hello' AS CHAR(5)))", BIGINT, 5L); assertFunction("LENGTH(CAST('Quadratically' AS CHAR(13)))", BIGINT, 13L); assertFunction("LENGTH(CAST('' AS CHAR(20)))", BIGINT, 20L); assertFunction("LENGTH(CAST('hello' AS CHAR(20)))", BIGINT, 20L); assertFunction("LENGTH(CAST('Quadratically' AS CHAR(20)))", BIGINT, 20L); // Test length for non-ASCII assertFunction("LENGTH(CAST('hello na\u00EFve world' AS CHAR(17)))", BIGINT, 17L); assertFunction("LENGTH(CAST('\uD801\uDC2Dend' AS CHAR(4)))", BIGINT, 4L); assertFunction("LENGTH(CAST('\u4FE1\u5FF5,\u7231,\u5E0C\u671B' AS CHAR(7)))", BIGINT, 7L); assertFunction("LENGTH(CAST('hello na\u00EFve world' AS CHAR(20)))", BIGINT, 20L); assertFunction("LENGTH(CAST('\uD801\uDC2Dend' AS CHAR(20)))", BIGINT, 20L); assertFunction("LENGTH(CAST('\u4FE1\u5FF5,\u7231,\u5E0C\u671B' AS CHAR(20)))", BIGINT, 20L); } @Test public void testLevenshteinDistance() { assertFunction("LEVENSHTEIN_DISTANCE('', '')", BIGINT, 0L); assertFunction("LEVENSHTEIN_DISTANCE('', 'hello')", BIGINT, 5L); assertFunction("LEVENSHTEIN_DISTANCE('hello', '')", BIGINT, 5L); assertFunction("LEVENSHTEIN_DISTANCE('hello', 'hello')", BIGINT, 0L); assertFunction("LEVENSHTEIN_DISTANCE('hello', 'hello world')", BIGINT, 6L); assertFunction("LEVENSHTEIN_DISTANCE('hello world', 'hel wold')", BIGINT, 3L); assertFunction("LEVENSHTEIN_DISTANCE('hello world', 'hellq wodld')", BIGINT, 2L); assertFunction("LEVENSHTEIN_DISTANCE('helo word', 'hello world')", BIGINT, 2L); assertFunction("LEVENSHTEIN_DISTANCE('hello word', 'dello world')", BIGINT, 2L); // Test for non-ASCII assertFunction("LEVENSHTEIN_DISTANCE('hello na\u00EFve world', 'hello naive world')", BIGINT, 1L); assertFunction("LEVENSHTEIN_DISTANCE('hello na\u00EFve world', 'hello na:ive world')", BIGINT, 2L); assertFunction("LEVENSHTEIN_DISTANCE('\u4FE1\u5FF5,\u7231,\u5E0C\u671B', '\u4FE1\u4EF0,\u7231,\u5E0C\u671B')", BIGINT, 1L); assertFunction("LEVENSHTEIN_DISTANCE('\u4F11\u5FF5,\u7231,\u5E0C\u671B', '\u4FE1\u5FF5,\u7231,\u5E0C\u671B')", BIGINT, 1L); assertFunction("LEVENSHTEIN_DISTANCE('\u4FE1\u5FF5,\u7231,\u5E0C\u671B', '\u4FE1\u5FF5\u5E0C\u671B')", BIGINT, 3L); assertFunction("LEVENSHTEIN_DISTANCE('\u4FE1\u5FF5,\u7231,\u5E0C\u671B', '\u4FE1\u5FF5,love,\u5E0C\u671B')", BIGINT, 4L); // Test for invalid utf-8 characters assertInvalidFunction("LEVENSHTEIN_DISTANCE('hello world', utf8(from_hex('81')))", "Invalid UTF-8 encoding in characters: �"); assertInvalidFunction("LEVENSHTEIN_DISTANCE('hello wolrd', utf8(from_hex('3281')))", "Invalid UTF-8 encoding in characters: 2�"); // Test for maximum length assertFunction(format("LEVENSHTEIN_DISTANCE('hello', '%s')", repeat("e", 100_000)), BIGINT, 99999L); assertFunction(format("LEVENSHTEIN_DISTANCE('%s', 'hello')", repeat("l", 100_000)), BIGINT, 99998L); assertInvalidFunction(format("LEVENSHTEIN_DISTANCE('%s', '%s')", repeat("x", 1001), repeat("x", 1001)), "The combined inputs for Levenshtein distance are too large"); assertInvalidFunction(format("LEVENSHTEIN_DISTANCE('hello', '%s')", repeat("x", 500_000)), "The combined inputs for Levenshtein distance are too large"); assertInvalidFunction(format("LEVENSHTEIN_DISTANCE('%s', 'hello')", repeat("x", 500_000)), "The combined inputs for Levenshtein distance are too large"); } @Test public void testReplace() { assertFunction("REPLACE('aaa', 'a', 'aa')", createVarcharType(11), "aaaaaa"); assertFunction("REPLACE('abcdefabcdef', 'cd', 'XX')", createVarcharType(38), "abXXefabXXef"); assertFunction("REPLACE('abcdefabcdef', 'cd')", createVarcharType(12), "abefabef"); assertFunction("REPLACE('123123tech', '123')", createVarcharType(10), "tech"); assertFunction("REPLACE('123tech123', '123')", createVarcharType(10), "tech"); assertFunction("REPLACE('222tech', '2', '3')", createVarcharType(15), "333tech"); assertFunction("REPLACE('0000123', '0')", createVarcharType(7), "123"); assertFunction("REPLACE('0000123', '0', ' ')", createVarcharType(15), " 123"); assertFunction("REPLACE('foo', '')", createVarcharType(3), "foo"); assertFunction("REPLACE('foo', '', '')", createVarcharType(3), "foo"); assertFunction("REPLACE('foo', 'foo', '')", createVarcharType(3), ""); assertFunction("REPLACE('abc', '', 'xx')", createVarcharType(11), "xxaxxbxxcxx"); assertFunction("REPLACE('', '', 'xx')", createVarcharType(2), "xx"); assertFunction("REPLACE('', '')", createVarcharType(0), ""); assertFunction("REPLACE('', '', '')", createVarcharType(0), ""); assertFunction("REPLACE('\u4FE1\u5FF5,\u7231,\u5E0C\u671B', ',', '\u2014')", createVarcharType(15), "\u4FE1\u5FF5\u2014\u7231\u2014\u5E0C\u671B"); assertFunction("REPLACE('::\uD801\uDC2D::', ':', '')", createVarcharType(5), "\uD801\uDC2D"); //\uD801\uDC2D is one character assertFunction("REPLACE('\u00D6sterreich', '\u00D6', 'Oe')", createVarcharType(32), "Oesterreich"); assertFunction("CAST(REPLACE(utf8(from_hex('CE')), '', 'X') AS VARBINARY)", VARBINARY, new SqlVarbinary(new byte[] {'X', (byte) 0xCE, 'X'})); assertFunction("CAST(REPLACE('abc' || utf8(from_hex('CE')), '', 'X') AS VARBINARY)", VARBINARY, new SqlVarbinary(new byte[] {'X', 'a', 'X', 'b', 'X', 'c', 'X', (byte) 0xCE, 'X'})); assertFunction("CAST(REPLACE(utf8(from_hex('CE')) || 'xyz', '', 'X') AS VARBINARY)", VARBINARY, new SqlVarbinary(new byte[] {'X', (byte) 0xCE, 'X', 'x', 'X', 'y', 'X', 'z', 'X'})); assertFunction("CAST(REPLACE('abc' || utf8(from_hex('CE')) || 'xyz', '', 'X') AS VARBINARY)", VARBINARY, new SqlVarbinary(new byte[] {'X', 'a', 'X', 'b', 'X', 'c', 'X', (byte) 0xCE, 'X', 'x', 'X', 'y', 'X', 'z', 'X'})); } @Test public void testReverse() { assertFunction("REVERSE('')", createVarcharType(0), ""); assertFunction("REVERSE('hello')", createVarcharType(5), "olleh"); assertFunction("REVERSE('Quadratically')", createVarcharType(13), "yllacitardauQ"); assertFunction("REVERSE('racecar')", createVarcharType(7), "racecar"); // Test REVERSE for non-ASCII assertFunction("REVERSE('\u4FE1\u5FF5,\u7231,\u5E0C\u671B')", createVarcharType(7), "\u671B\u5E0C,\u7231,\u5FF5\u4FE1"); assertFunction("REVERSE('\u00D6sterreich')", createVarcharType(10), "hcierrets\u00D6"); assertFunction("REVERSE('na\u00EFve')", createVarcharType(5), "ev\u00EFan"); assertFunction("REVERSE('\uD801\uDC2Dend')", createVarcharType(4), "dne\uD801\uDC2D"); assertFunction("CAST(REVERSE(utf8(from_hex('CE'))) AS VARBINARY)", VARBINARY, new SqlVarbinary(new byte[] {(byte) 0xCE})); assertFunction("CAST(REVERSE('hello' || utf8(from_hex('CE'))) AS VARBINARY)", VARBINARY, new SqlVarbinary(new byte[] {(byte) 0xCE, 'o', 'l', 'l', 'e', 'h'})); } @Test public void testStringPosition() { testStrPosAndPosition("high", "ig", 2L); testStrPosAndPosition("high", "igx", 0L); testStrPosAndPosition("Quadratically", "a", 3L); testStrPosAndPosition("foobar", "foobar", 1L); testStrPosAndPosition("foobar", "obar", 3L); testStrPosAndPosition("zoo!", "!", 4L); testStrPosAndPosition("x", "", 1L); testStrPosAndPosition("", "", 1L); testStrPosAndPosition("\u4FE1\u5FF5,\u7231,\u5E0C\u671B", "\u7231", 4L); testStrPosAndPosition("\u4FE1\u5FF5,\u7231,\u5E0C\u671B", "\u5E0C\u671B", 6L); testStrPosAndPosition("\u4FE1\u5FF5,\u7231,\u5E0C\u671B", "nice", 0L); testStrPosAndPosition(null, "", null); testStrPosAndPosition("", null, null); testStrPosAndPosition(null, null, null); } private void testStrPosAndPosition(String string, String substring, Long expected) { string = (string == null) ? "NULL" : ("'" + string + "'"); substring = (substring == null) ? "NULL" : ("'" + substring + "'"); assertFunction(format("STRPOS(%s, %s)", string, substring), BIGINT, expected); assertFunction(format("POSITION(%s in %s)", substring, string), BIGINT, expected); } @Test public void testSubstring() { assertFunction("SUBSTR('Quadratically', 5)", createVarcharType(13), "ratically"); assertFunction("SUBSTR('Quadratically', 50)", createVarcharType(13), ""); assertFunction("SUBSTR('Quadratically', -5)", createVarcharType(13), "cally"); assertFunction("SUBSTR('Quadratically', -50)", createVarcharType(13), ""); assertFunction("SUBSTR('Quadratically', 0)", createVarcharType(13), ""); assertFunction("SUBSTR('Quadratically', 5, 6)", createVarcharType(13), "ratica"); assertFunction("SUBSTR('Quadratically', 5, 10)", createVarcharType(13), "ratically"); assertFunction("SUBSTR('Quadratically', 5, 50)", createVarcharType(13), "ratically"); assertFunction("SUBSTR('Quadratically', 50, 10)", createVarcharType(13), ""); assertFunction("SUBSTR('Quadratically', -5, 4)", createVarcharType(13), "call"); assertFunction("SUBSTR('Quadratically', -5, 40)", createVarcharType(13), "cally"); assertFunction("SUBSTR('Quadratically', -50, 4)", createVarcharType(13), ""); assertFunction("SUBSTR('Quadratically', 0, 4)", createVarcharType(13), ""); assertFunction("SUBSTR('Quadratically', 5, 0)", createVarcharType(13), ""); assertFunction("SUBSTRING('Quadratically' FROM 5)", createVarcharType(13), "ratically"); assertFunction("SUBSTRING('Quadratically' FROM 50)", createVarcharType(13), ""); assertFunction("SUBSTRING('Quadratically' FROM -5)", createVarcharType(13), "cally"); assertFunction("SUBSTRING('Quadratically' FROM -50)", createVarcharType(13), ""); assertFunction("SUBSTRING('Quadratically' FROM 0)", createVarcharType(13), ""); assertFunction("SUBSTRING('Quadratically' FROM 5 FOR 6)", createVarcharType(13), "ratica"); assertFunction("SUBSTRING('Quadratically' FROM 5 FOR 50)", createVarcharType(13), "ratically"); // // Test SUBSTRING for non-ASCII assertFunction("SUBSTRING('\u4FE1\u5FF5,\u7231,\u5E0C\u671B' FROM 1 FOR 1)", createVarcharType(7), "\u4FE1"); assertFunction("SUBSTRING('\u4FE1\u5FF5,\u7231,\u5E0C\u671B' FROM 3 FOR 5)", createVarcharType(7), ",\u7231,\u5E0C\u671B"); assertFunction("SUBSTRING('\u4FE1\u5FF5,\u7231,\u5E0C\u671B' FROM 4)", createVarcharType(7), "\u7231,\u5E0C\u671B"); assertFunction("SUBSTRING('\u4FE1\u5FF5,\u7231,\u5E0C\u671B' FROM -2)", createVarcharType(7), "\u5E0C\u671B"); assertFunction("SUBSTRING('\uD801\uDC2Dend' FROM 1 FOR 1)", createVarcharType(4), "\uD801\uDC2D"); assertFunction("SUBSTRING('\uD801\uDC2Dend' FROM 2 FOR 3)", createVarcharType(4), "end"); } @Test public void testCharSubstring() { assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), 5)", createCharType(13), padRight("ratically", 13)); assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), 50)", createCharType(13), padRight("", 13)); assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), -5)", createCharType(13), padRight("cally", 13)); assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), -50)", createCharType(13), padRight("", 13)); assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), 0)", createCharType(13), padRight("", 13)); assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), 5, 6)", createCharType(13), padRight("ratica", 13)); assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), 5, 10)", createCharType(13), padRight("ratically", 13)); assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), 5, 50)", createCharType(13), padRight("ratically", 13)); assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), 50, 10)", createCharType(13), padRight("", 13)); assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), -5, 4)", createCharType(13), padRight("call", 13)); assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), -5, 40)", createCharType(13), padRight("cally", 13)); assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), -50, 4)", createCharType(13), padRight("", 13)); assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), 0, 4)", createCharType(13), padRight("", 13)); assertFunction("SUBSTR(CAST('Quadratically' AS CHAR(13)), 5, 0)", createCharType(13), padRight("", 13)); assertFunction("SUBSTRING(CAST('Quadratically' AS CHAR(13)) FROM 5)", createCharType(13), padRight("ratically", 13)); assertFunction("SUBSTRING(CAST('Quadratically' AS CHAR(13)) FROM 50)", createCharType(13), padRight("", 13)); assertFunction("SUBSTRING(CAST('Quadratically' AS CHAR(13)) FROM -5)", createCharType(13), padRight("cally", 13)); assertFunction("SUBSTRING(CAST('Quadratically' AS CHAR(13)) FROM -50)", createCharType(13), padRight("", 13)); assertFunction("SUBSTRING(CAST('Quadratically' AS CHAR(13)) FROM 0)", createCharType(13), padRight("", 13)); assertFunction("SUBSTRING(CAST('Quadratically' AS CHAR(13)) FROM 5 FOR 6)", createCharType(13), padRight("ratica", 13)); assertFunction("SUBSTRING(CAST('Quadratically' AS CHAR(13)) FROM 5 FOR 50)", createCharType(13), padRight("ratically", 13)); // // Test SUBSTRING for non-ASCII assertFunction("SUBSTRING(CAST('\u4FE1\u5FF5,\u7231,\u5E0C\u671B' AS CHAR(7)) FROM 1 FOR 1)", createCharType(7), padRight("\u4FE1", 7)); assertFunction("SUBSTRING(CAST('\u4FE1\u5FF5,\u7231,\u5E0C\u671B' AS CHAR(7)) FROM 3 FOR 5)", createCharType(7), padRight(",\u7231,\u5E0C\u671B", 7)); assertFunction("SUBSTRING(CAST('\u4FE1\u5FF5,\u7231,\u5E0C\u671B' AS CHAR(7)) FROM 4)", createCharType(7), padRight("\u7231,\u5E0C\u671B", 7)); assertFunction("SUBSTRING(CAST('\u4FE1\u5FF5,\u7231,\u5E0C\u671B' AS CHAR(7)) FROM -2)", createCharType(7), padRight("\u5E0C\u671B", 7)); assertFunction("SUBSTRING(CAST('\uD801\uDC2Dend' AS CHAR(4)) FROM 1 FOR 1)", createCharType(4), padRight("\uD801\uDC2D", 4)); assertFunction("SUBSTRING(CAST('\uD801\uDC2Dend' AS CHAR(4)) FROM 2 FOR 3)", createCharType(4), padRight("end", 4)); assertFunction("SUBSTRING(CAST('\uD801\uDC2Dend' AS CHAR(40)) FROM 2 FOR 3)", createCharType(40), padRight("end", 40)); } @Test public void testSplit() { assertFunction("SPLIT('a.b.c', '.')", new ArrayType(createVarcharType(5)), ImmutableList.of("a", "b", "c")); assertFunction("SPLIT('ab', '.', 1)", new ArrayType(createVarcharType(2)), ImmutableList.of("ab")); assertFunction("SPLIT('a.b', '.', 1)", new ArrayType(createVarcharType(3)), ImmutableList.of("a.b")); assertFunction("SPLIT('a.b.c', '.')", new ArrayType(createVarcharType(5)), ImmutableList.of("a", "b", "c")); assertFunction("SPLIT('a..b..c', '..')", new ArrayType(createVarcharType(7)), ImmutableList.of("a", "b", "c")); assertFunction("SPLIT('a.b.c', '.', 2)", new ArrayType(createVarcharType(5)), ImmutableList.of("a", "b.c")); assertFunction("SPLIT('a.b.c', '.', 3)", new ArrayType(createVarcharType(5)), ImmutableList.of("a", "b", "c")); assertFunction("SPLIT('a.b.c', '.', 4)", new ArrayType(createVarcharType(5)), ImmutableList.of("a", "b", "c")); assertFunction("SPLIT('a.b.c.', '.', 4)", new ArrayType(createVarcharType(6)), ImmutableList.of("a", "b", "c", "")); assertFunction("SPLIT('a.b.c.', '.', 3)", new ArrayType(createVarcharType(6)), ImmutableList.of("a", "b", "c.")); assertFunction("SPLIT('...', '.')", new ArrayType(createVarcharType(3)), ImmutableList.of("", "", "", "")); assertFunction("SPLIT('..a...a..', '.')", new ArrayType(createVarcharType(9)), ImmutableList.of("", "", "a", "", "", "a", "", "")); // Test SPLIT for non-ASCII assertFunction("SPLIT('\u4FE1\u5FF5,\u7231,\u5E0C\u671B', ',', 3)", new ArrayType(createVarcharType(7)), ImmutableList.of("\u4FE1\u5FF5", "\u7231", "\u5E0C\u671B")); assertFunction("SPLIT('\u8B49\u8BC1\u8A3C', '\u8BC1', 2)", new ArrayType(createVarcharType(3)), ImmutableList.of("\u8B49", "\u8A3C")); assertFunction("SPLIT('.a.b.c', '.', 4)", new ArrayType(createVarcharType(6)), ImmutableList.of("", "a", "b", "c")); assertFunction("SPLIT('.a.b.c', '.', 3)", new ArrayType(createVarcharType(6)), ImmutableList.of("", "a", "b.c")); assertFunction("SPLIT('.a.b.c', '.', 2)", new ArrayType(createVarcharType(6)), ImmutableList.of("", "a.b.c")); assertFunction("SPLIT('a..b..c', '.', 3)", new ArrayType(createVarcharType(7)), ImmutableList.of("a", "", "b..c")); assertFunction("SPLIT('a.b..', '.', 3)", new ArrayType(createVarcharType(5)), ImmutableList.of("a", "b", ".")); assertInvalidFunction("SPLIT('a.b.c', '', 1)", "The delimiter may not be the empty string"); assertInvalidFunction("SPLIT('a.b.c', '.', 0)", "Limit must be positive"); assertInvalidFunction("SPLIT('a.b.c', '.', -1)", "Limit must be positive"); assertInvalidFunction("SPLIT('a.b.c', '.', 2147483648)", "Limit is too large"); } @Test public void testSplitToMap() { MapType expectedType = mapType(VARCHAR, VARCHAR); assertFunction("SPLIT_TO_MAP('', ',', '=')", expectedType, ImmutableMap.of()); assertFunction("SPLIT_TO_MAP('a=123,b=.4,c=,=d', ',', '=')", expectedType, ImmutableMap.of("a", "123", "b", ".4", "c", "", "", "d")); assertFunction("SPLIT_TO_MAP('=', ',', '=')", expectedType, ImmutableMap.of("", "")); assertFunction("SPLIT_TO_MAP('key=>value', ',', '=>')", expectedType, ImmutableMap.of("key", "value")); assertFunction("SPLIT_TO_MAP('key => value', ',', '=>')", expectedType, ImmutableMap.of("key ", " value")); // Test SPLIT_TO_MAP for non-ASCII assertFunction("SPLIT_TO_MAP('\u4EA0\u4EFF\u4EA1', '\u4E00', '\u4EFF')", expectedType, ImmutableMap.of("\u4EA0", "\u4EA1")); // If corresponding value is not found, then ""(empty string) is its value assertFunction("SPLIT_TO_MAP('\u4EC0\u4EFF', '\u4E00', '\u4EFF')", expectedType, ImmutableMap.of("\u4EC0", "")); // If corresponding key is not found, then ""(empty string) is its key assertFunction("SPLIT_TO_MAP('\u4EFF\u4EC1', '\u4E00', '\u4EFF')", expectedType, ImmutableMap.of("", "\u4EC1")); // Entry delimiter and key-value delimiter must not be the same. assertInvalidFunction("SPLIT_TO_MAP('', '\u4EFF', '\u4EFF')", "entryDelimiter and keyValueDelimiter must not be the same"); assertInvalidFunction("SPLIT_TO_MAP('a=123,b=.4,c=', '=', '=')", "entryDelimiter and keyValueDelimiter must not be the same"); // Duplicate keys are not allowed to exist. assertInvalidFunction("SPLIT_TO_MAP('a=123,a=.4', ',', '=')", "Duplicate keys (a) are not allowed"); assertInvalidFunction("SPLIT_TO_MAP('\u4EA0\u4EFF\u4EA1\u4E00\u4EA0\u4EFF\u4EB1', '\u4E00', '\u4EFF')", "Duplicate keys (\u4EA0) are not allowed"); // Key-value delimiter must appear exactly once in each entry. assertInvalidFunction("SPLIT_TO_MAP('key', ',', '=')", "Key-value delimiter must appear exactly once in each entry. Bad input: 'key'"); assertInvalidFunction("SPLIT_TO_MAP('key==value', ',', '=')", "Key-value delimiter must appear exactly once in each entry. Bad input: 'key==value'"); assertInvalidFunction("SPLIT_TO_MAP('key=va=lue', ',', '=')", "Key-value delimiter must appear exactly once in each entry. Bad input: 'key=va=lue'"); } @Test public void testSplitPart() { assertFunction("SPLIT_PART('abc-@-def-@-ghi', '-@-', 1)", createVarcharType(15), "abc"); assertFunction("SPLIT_PART('abc-@-def-@-ghi', '-@-', 2)", createVarcharType(15), "def"); assertFunction("SPLIT_PART('abc-@-def-@-ghi', '-@-', 3)", createVarcharType(15), "ghi"); assertFunction("SPLIT_PART('abc-@-def-@-ghi', '-@-', 4)", createVarcharType(15), null); assertFunction("SPLIT_PART('abc-@-def-@-ghi', '-@-', 99)", createVarcharType(15), null); assertFunction("SPLIT_PART('abc', 'abc', 1)", createVarcharType(3), ""); assertFunction("SPLIT_PART('abc', 'abc', 2)", createVarcharType(3), ""); assertFunction("SPLIT_PART('abc', 'abc', 3)", createVarcharType(3), null); assertFunction("SPLIT_PART('abc', '-@-', 1)", createVarcharType(3), "abc"); assertFunction("SPLIT_PART('abc', '-@-', 2)", createVarcharType(3), null); assertFunction("SPLIT_PART('', 'abc', 1)", createVarcharType(0), ""); assertFunction("SPLIT_PART('', '', 1)", createVarcharType(0), null); assertFunction("SPLIT_PART('abc', '', 1)", createVarcharType(3), "a"); assertFunction("SPLIT_PART('abc', '', 2)", createVarcharType(3), "b"); assertFunction("SPLIT_PART('abc', '', 3)", createVarcharType(3), "c"); assertFunction("SPLIT_PART('abc', '', 4)", createVarcharType(3), null); assertFunction("SPLIT_PART('abc', '', 99)", createVarcharType(3), null); assertFunction("SPLIT_PART('abc', 'abcd', 1)", createVarcharType(3), "abc"); assertFunction("SPLIT_PART('abc', 'abcd', 2)", createVarcharType(3), null); assertFunction("SPLIT_PART('abc--@--def', '-@-', 1)", createVarcharType(11), "abc-"); assertFunction("SPLIT_PART('abc--@--def', '-@-', 2)", createVarcharType(11), "-def"); assertFunction("SPLIT_PART('abc-@-@-@-def', '-@-', 1)", createVarcharType(13), "abc"); assertFunction("SPLIT_PART('abc-@-@-@-def', '-@-', 2)", createVarcharType(13), "@"); assertFunction("SPLIT_PART('abc-@-@-@-def', '-@-', 3)", createVarcharType(13), "def"); assertFunction("SPLIT_PART(' ', ' ', 1)", createVarcharType(1), ""); assertFunction("SPLIT_PART('abcdddddef', 'dd', 1)", createVarcharType(10), "abc"); assertFunction("SPLIT_PART('abcdddddef', 'dd', 2)", createVarcharType(10), ""); assertFunction("SPLIT_PART('abcdddddef', 'dd', 3)", createVarcharType(10), "def"); assertFunction("SPLIT_PART('a/b/c', '/', 4)", createVarcharType(5), null); assertFunction("SPLIT_PART('a/b/c/', '/', 4)", createVarcharType(6), ""); // // Test SPLIT_PART for non-ASCII assertFunction("SPLIT_PART('\u4FE1\u5FF5,\u7231,\u5E0C\u671B', ',', 1)", createVarcharType(7), "\u4FE1\u5FF5"); assertFunction("SPLIT_PART('\u4FE1\u5FF5,\u7231,\u5E0C\u671B', ',', 2)", createVarcharType(7), "\u7231"); assertFunction("SPLIT_PART('\u4FE1\u5FF5,\u7231,\u5E0C\u671B', ',', 3)", createVarcharType(7), "\u5E0C\u671B"); assertFunction("SPLIT_PART('\u4FE1\u5FF5,\u7231,\u5E0C\u671B', ',', 4)", createVarcharType(7), null); assertFunction("SPLIT_PART('\u8B49\u8BC1\u8A3C', '\u8BC1', 1)", createVarcharType(3), "\u8B49"); assertFunction("SPLIT_PART('\u8B49\u8BC1\u8A3C', '\u8BC1', 2)", createVarcharType(3), "\u8A3C"); assertFunction("SPLIT_PART('\u8B49\u8BC1\u8A3C', '\u8BC1', 3)", createVarcharType(3), null); assertInvalidFunction("SPLIT_PART('abc', '', 0)", "Index must be greater than zero"); assertInvalidFunction("SPLIT_PART('abc', '', -1)", "Index must be greater than zero"); assertInvalidFunction("SPLIT_PART(utf8(from_hex('CE')), '', 1)", "Invalid UTF-8 encoding"); } @Test(expectedExceptions = RuntimeException.class) public void testSplitPartInvalid() { assertFunction("SPLIT_PART('abc-@-def-@-ghi', '-@-', 0)", VARCHAR, ""); } @Test public void testLeftTrim() { assertFunction("LTRIM('')", createVarcharType(0), ""); assertFunction("LTRIM(' ')", createVarcharType(3), ""); assertFunction("LTRIM(' hello ')", createVarcharType(9), "hello "); assertFunction("LTRIM(' hello')", createVarcharType(7), "hello"); assertFunction("LTRIM('hello ')", createVarcharType(7), "hello "); assertFunction("LTRIM(' hello world ')", createVarcharType(13), "hello world "); assertFunction("LTRIM('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ')", createVarcharType(9), "\u4FE1\u5FF5 \u7231 \u5E0C\u671B "); assertFunction("LTRIM(' \u4FE1\u5FF5 \u7231 \u5E0C\u671B ')", createVarcharType(9), "\u4FE1\u5FF5 \u7231 \u5E0C\u671B "); assertFunction("LTRIM(' \u4FE1\u5FF5 \u7231 \u5E0C\u671B')", createVarcharType(9), "\u4FE1\u5FF5 \u7231 \u5E0C\u671B"); assertFunction("LTRIM(' \u2028 \u4FE1\u5FF5 \u7231 \u5E0C\u671B')", createVarcharType(10), "\u4FE1\u5FF5 \u7231 \u5E0C\u671B"); } @Test public void testCharLeftTrim() { assertFunction("LTRIM(CAST('' AS CHAR(20)))", createCharType(20), padRight("", 20)); assertFunction("LTRIM(CAST(' hello ' AS CHAR(9)))", createCharType(9), padRight("hello", 9)); assertFunction("LTRIM(CAST(' hello' AS CHAR(7)))", createCharType(7), padRight("hello", 7)); assertFunction("LTRIM(CAST('hello ' AS CHAR(7)))", createCharType(7), padRight("hello", 7)); assertFunction("LTRIM(CAST(' hello world ' AS CHAR(13)))", createCharType(13), padRight("hello world", 13)); assertFunction("LTRIM(CAST('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ' AS CHAR(9)))", createCharType(9), padRight("\u4FE1\u5FF5 \u7231 \u5E0C\u671B", 9)); assertFunction("LTRIM(CAST(' \u4FE1\u5FF5 \u7231 \u5E0C\u671B ' AS CHAR(9)))", createCharType(9), padRight("\u4FE1\u5FF5 \u7231 \u5E0C\u671B", 9)); assertFunction("LTRIM(CAST(' \u4FE1\u5FF5 \u7231 \u5E0C\u671B' AS CHAR(9)))", createCharType(9), padRight("\u4FE1\u5FF5 \u7231 \u5E0C\u671B", 9)); assertFunction("LTRIM(CAST(' \u2028 \u4FE1\u5FF5 \u7231 \u5E0C\u671B' AS CHAR(10)))", createCharType(10), padRight("\u4FE1\u5FF5 \u7231 \u5E0C\u671B", 10)); } @Test public void testRightTrim() { assertFunction("RTRIM('')", createVarcharType(0), ""); assertFunction("RTRIM(' ')", createVarcharType(3), ""); assertFunction("RTRIM(' hello ')", createVarcharType(9), " hello"); assertFunction("RTRIM(' hello')", createVarcharType(7), " hello"); assertFunction("RTRIM('hello ')", createVarcharType(7), "hello"); assertFunction("RTRIM(' hello world ')", createVarcharType(13), " hello world"); assertFunction("RTRIM('\u4FE1\u5FF5 \u7231 \u5E0C\u671B \u2028 ')", createVarcharType(10), "\u4FE1\u5FF5 \u7231 \u5E0C\u671B"); assertFunction("RTRIM('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ')", createVarcharType(9), "\u4FE1\u5FF5 \u7231 \u5E0C\u671B"); assertFunction("RTRIM(' \u4FE1\u5FF5 \u7231 \u5E0C\u671B ')", createVarcharType(9), " \u4FE1\u5FF5 \u7231 \u5E0C\u671B"); assertFunction("RTRIM(' \u4FE1\u5FF5 \u7231 \u5E0C\u671B')", createVarcharType(9), " \u4FE1\u5FF5 \u7231 \u5E0C\u671B"); } @Test public void testCharRightTrim() { assertFunction("RTRIM(CAST('' AS CHAR(20)))", createCharType(20), padRight("", 20)); assertFunction("RTRIM(CAST(' hello ' AS CHAR(9)))", createCharType(9), padRight(" hello", 9)); assertFunction("RTRIM(CAST(' hello' AS CHAR(7)))", createCharType(7), padRight(" hello", 7)); assertFunction("RTRIM(CAST('hello ' AS CHAR(7)))", createCharType(7), padRight("hello", 7)); assertFunction("RTRIM(CAST(' hello world ' AS CHAR(13)))", createCharType(13), padRight(" hello world", 13)); assertFunction("RTRIM(CAST('\u4FE1\u5FF5 \u7231 \u5E0C\u671B \u2028 ' AS CHAR(10)))", createCharType(10), padRight("\u4FE1\u5FF5 \u7231 \u5E0C\u671B", 10)); assertFunction("RTRIM(CAST('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ' AS CHAR(9)))", createCharType(9), padRight("\u4FE1\u5FF5 \u7231 \u5E0C\u671B", 9)); assertFunction("RTRIM(CAST(' \u4FE1\u5FF5 \u7231 \u5E0C\u671B ' AS CHAR(9)))", createCharType(9), padRight(" \u4FE1\u5FF5 \u7231 \u5E0C\u671B", 9)); assertFunction("RTRIM(CAST(' \u4FE1\u5FF5 \u7231 \u5E0C\u671B' AS CHAR(9)))", createCharType(9), padRight(" \u4FE1\u5FF5 \u7231 \u5E0C\u671B", 9)); } @Test public void testTrim() { assertFunction("TRIM('')", createVarcharType(0), ""); assertFunction("TRIM(' ')", createVarcharType(3), ""); assertFunction("TRIM(' hello ')", createVarcharType(9), "hello"); assertFunction("TRIM(' hello')", createVarcharType(7), "hello"); assertFunction("TRIM('hello ')", createVarcharType(7), "hello"); assertFunction("TRIM(' hello world ')", createVarcharType(13), "hello world"); assertFunction("TRIM('\u4FE1\u5FF5 \u7231 \u5E0C\u671B \u2028 ')", createVarcharType(10), "\u4FE1\u5FF5 \u7231 \u5E0C\u671B"); assertFunction("TRIM('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ')", createVarcharType(9), "\u4FE1\u5FF5 \u7231 \u5E0C\u671B"); assertFunction("TRIM(' \u4FE1\u5FF5 \u7231 \u5E0C\u671B ')", createVarcharType(9), "\u4FE1\u5FF5 \u7231 \u5E0C\u671B"); assertFunction("TRIM(' \u4FE1\u5FF5 \u7231 \u5E0C\u671B')", createVarcharType(9), "\u4FE1\u5FF5 \u7231 \u5E0C\u671B"); assertFunction("TRIM(' \u2028 \u4FE1\u5FF5 \u7231 \u5E0C\u671B')", createVarcharType(10), "\u4FE1\u5FF5 \u7231 \u5E0C\u671B"); } @Test public void testCharTrim() { assertFunction("TRIM(CAST('' AS CHAR(20)))", createCharType(20), padRight("", 20)); assertFunction("TRIM(CAST(' hello ' AS CHAR(9)))", createCharType(9), padRight("hello", 9)); assertFunction("TRIM(CAST(' hello' AS CHAR(7)))", createCharType(7), padRight("hello", 7)); assertFunction("TRIM(CAST('hello ' AS CHAR(7)))", createCharType(7), padRight("hello", 7)); assertFunction("TRIM(CAST(' hello world ' AS CHAR(13)))", createCharType(13), padRight("hello world", 13)); assertFunction("TRIM(CAST('\u4FE1\u5FF5 \u7231 \u5E0C\u671B \u2028 ' AS CHAR(10)))", createCharType(10), padRight("\u4FE1\u5FF5 \u7231 \u5E0C\u671B", 10)); assertFunction("TRIM(CAST('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ' AS CHAR(9)))", createCharType(9), padRight("\u4FE1\u5FF5 \u7231 \u5E0C\u671B", 9)); assertFunction("TRIM(CAST(' \u4FE1\u5FF5 \u7231 \u5E0C\u671B ' AS CHAR(9)))", createCharType(9), padRight("\u4FE1\u5FF5 \u7231 \u5E0C\u671B", 9)); assertFunction("TRIM(CAST(' \u4FE1\u5FF5 \u7231 \u5E0C\u671B' AS CHAR(9)))", createCharType(9), padRight("\u4FE1\u5FF5 \u7231 \u5E0C\u671B", 9)); assertFunction("TRIM(CAST(' \u2028 \u4FE1\u5FF5 \u7231 \u5E0C\u671B' AS CHAR(10)))", createCharType(10), padRight("\u4FE1\u5FF5 \u7231 \u5E0C\u671B", 10)); } @Test public void testLeftTrimParametrized() { assertFunction("LTRIM('', '')", createVarcharType(0), ""); assertFunction("LTRIM(' ', '')", createVarcharType(3), " "); assertFunction("LTRIM(' hello ', '')", createVarcharType(9), " hello "); assertFunction("LTRIM(' hello ', ' ')", createVarcharType(9), "hello "); assertFunction("LTRIM(' hello ', CHAR ' ')", createVarcharType(9), "hello "); assertFunction("LTRIM(' hello ', 'he ')", createVarcharType(9), "llo "); assertFunction("LTRIM(' hello', ' ')", createVarcharType(7), "hello"); assertFunction("LTRIM(' hello', 'e h')", createVarcharType(7), "llo"); assertFunction("LTRIM('hello ', 'l')", createVarcharType(7), "hello "); assertFunction("LTRIM(' hello world ', ' ')", createVarcharType(13), "hello world "); assertFunction("LTRIM(' hello world ', ' eh')", createVarcharType(13), "llo world "); assertFunction("LTRIM(' hello world ', ' ehlowrd')", createVarcharType(13), ""); assertFunction("LTRIM(' hello world ', ' x')", createVarcharType(13), "hello world "); // non latin characters assertFunction("LTRIM('\u017a\u00f3\u0142\u0107', '\u00f3\u017a')", createVarcharType(4), "\u0142\u0107"); // invalid utf-8 characters assertFunction("CAST(LTRIM(utf8(from_hex('81')), ' ') AS VARBINARY)", VARBINARY, varbinary(0x81)); assertFunction("CAST(LTRIM(CONCAT(utf8(from_hex('81')), ' '), ' ') AS VARBINARY)", VARBINARY, varbinary(0x81, ' ')); assertFunction("CAST(LTRIM(CONCAT(' ', utf8(from_hex('81'))), ' ') AS VARBINARY)", VARBINARY, varbinary(0x81)); assertFunction("CAST(LTRIM(CONCAT(' ', utf8(from_hex('81')), ' '), ' ') AS VARBINARY)", VARBINARY, varbinary(0x81, ' ')); assertInvalidFunction("LTRIM('hello world', utf8(from_hex('81')))", "Invalid UTF-8 encoding in characters: �"); assertInvalidFunction("LTRIM('hello wolrd', utf8(from_hex('3281')))", "Invalid UTF-8 encoding in characters: 2�"); } @Test public void testCharLeftTrimParametrized() { assertFunction("LTRIM(CAST('' AS CHAR(1)), '')", createCharType(1), padRight("", 1)); assertFunction("LTRIM(CAST(' ' AS CHAR(3)), '')", createCharType(3), padRight("", 3)); assertFunction("LTRIM(CAST(' hello ' AS CHAR(9)), '')", createCharType(9), padRight(" hello", 9)); assertFunction("LTRIM(CAST(' hello ' AS CHAR(9)), ' ')", createCharType(9), padRight("hello", 9)); assertFunction("LTRIM(CAST(' hello ' AS CHAR(9)), 'he ')", createCharType(9), padRight("llo", 9)); assertFunction("LTRIM(CAST(' hello' AS CHAR(7)), ' ')", createCharType(7), padRight("hello", 7)); assertFunction("LTRIM(CAST(' hello' AS CHAR(7)), 'e h')", createCharType(7), padRight("llo", 7)); assertFunction("LTRIM(CAST('hello ' AS CHAR(7)), 'l')", createCharType(7), padRight("hello", 7)); assertFunction("LTRIM(CAST(' hello world ' AS CHAR(13)), ' ')", createCharType(13), padRight("hello world", 13)); assertFunction("LTRIM(CAST(' hello world ' AS CHAR(13)), ' eh')", createCharType(13), padRight("llo world", 13)); assertFunction("LTRIM(CAST(' hello world ' AS CHAR(13)), ' ehlowrd')", createCharType(13), padRight("", 13)); assertFunction("LTRIM(CAST(' hello world ' AS CHAR(13)), ' x')", createCharType(13), padRight("hello world", 13)); // non latin characters assertFunction("LTRIM(CAST('\u017a\u00f3\u0142\u0107' AS CHAR(4)), '\u00f3\u017a')", createCharType(4), padRight("\u0142\u0107", 4)); } private static SqlVarbinary varbinary(int... bytesAsInts) { byte[] bytes = new byte[bytesAsInts.length]; for (int i = 0; i < bytes.length; i++) { bytes[i] = (byte) bytesAsInts[i]; } return new SqlVarbinary(bytes); } @Test public void testRightTrimParametrized() { assertFunction("RTRIM('', '')", createVarcharType(0), ""); assertFunction("RTRIM(' ', '')", createVarcharType(3), " "); assertFunction("RTRIM(' hello ', '')", createVarcharType(9), " hello "); assertFunction("RTRIM(' hello ', ' ')", createVarcharType(9), " hello"); assertFunction("RTRIM(' hello ', 'lo ')", createVarcharType(9), " he"); assertFunction("RTRIM('hello ', ' ')", createVarcharType(7), "hello"); assertFunction("RTRIM('hello ', 'l o')", createVarcharType(7), "he"); assertFunction("RTRIM('hello ', 'l')", createVarcharType(7), "hello "); assertFunction("RTRIM(' hello world ', ' ')", createVarcharType(13), " hello world"); assertFunction("RTRIM(' hello world ', ' ld')", createVarcharType(13), " hello wor"); assertFunction("RTRIM(' hello world ', ' ehlowrd')", createVarcharType(13), ""); assertFunction("RTRIM(' hello world ', ' x')", createVarcharType(13), " hello world"); // non latin characters assertFunction("RTRIM('\u017a\u00f3\u0142\u0107', '\u0107\u0142')", createVarcharType(4), "\u017a\u00f3"); // invalid utf-8 characters assertFunction("CAST(RTRIM(utf8(from_hex('81')), ' ') AS VARBINARY)", VARBINARY, varbinary(0x81)); assertFunction("CAST(RTRIM(CONCAT(utf8(from_hex('81')), ' '), ' ') AS VARBINARY)", VARBINARY, varbinary(0x81)); assertFunction("CAST(RTRIM(CONCAT(' ', utf8(from_hex('81'))), ' ') AS VARBINARY)", VARBINARY, varbinary(' ', 0x81)); assertFunction("CAST(RTRIM(CONCAT(' ', utf8(from_hex('81')), ' '), ' ') AS VARBINARY)", VARBINARY, varbinary(' ', 0x81)); assertInvalidFunction("RTRIM('hello world', utf8(from_hex('81')))", "Invalid UTF-8 encoding in characters: �"); assertInvalidFunction("RTRIM('hello world', utf8(from_hex('3281')))", "Invalid UTF-8 encoding in characters: 2�"); } @Test public void testCharRightTrimParametrized() { assertFunction("RTRIM(CAST('' AS CHAR(1)), '')", createCharType(1), padRight("", 1)); assertFunction("RTRIM(CAST(' ' AS CHAR(3)), '')", createCharType(3), padRight("", 3)); assertFunction("RTRIM(CAST(' hello ' AS CHAR(9)), '')", createCharType(9), padRight(" hello", 9)); assertFunction("RTRIM(CAST(' hello ' AS CHAR(9)), ' ')", createCharType(9), padRight(" hello", 9)); assertFunction("RTRIM(CAST(' hello ' AS CHAR(9)), 'he ')", createCharType(9), padRight(" hello", 9)); assertFunction("RTRIM(CAST(' hello' AS CHAR(7)), ' ')", createCharType(7), padRight(" hello", 7)); assertFunction("RTRIM(CAST(' hello' AS CHAR(7)), 'e h')", createCharType(7), padRight(" hello", 7)); assertFunction("RTRIM(CAST('hello ' AS CHAR(7)), 'l')", createCharType(7), padRight("hello", 7)); assertFunction("RTRIM(CAST(' hello world ' AS CHAR(13)), ' ')", createCharType(13), padRight(" hello world", 13)); assertFunction("RTRIM(CAST(' hello world ' AS CHAR(13)), ' eh')", createCharType(13), padRight(" hello world", 13)); assertFunction("RTRIM(CAST(' hello world ' AS CHAR(13)), ' ehlowrd')", createCharType(13), padRight("", 13)); assertFunction("RTRIM(CAST(' hello world ' AS CHAR(13)), ' x')", createCharType(13), padRight(" hello world", 13)); // non latin characters assertFunction("RTRIM(CAST('\u017a\u00f3\u0142\u0107' AS CHAR(4)), '\u0107\u0142')", createCharType(4), padRight("\u017a\u00f3", 4)); } @Test public void testTrimParametrized() { assertFunction("TRIM('', '')", createVarcharType(0), ""); assertFunction("TRIM(' ', '')", createVarcharType(3), " "); assertFunction("TRIM(' hello ', '')", createVarcharType(9), " hello "); assertFunction("TRIM(' hello ', ' ')", createVarcharType(9), "hello"); assertFunction("TRIM(' hello ', 'he ')", createVarcharType(9), "llo"); assertFunction("TRIM(' hello ', 'lo ')", createVarcharType(9), "he"); assertFunction("TRIM(' hello', ' ')", createVarcharType(7), "hello"); assertFunction("TRIM('hello ', ' ')", createVarcharType(7), "hello"); assertFunction("TRIM('hello ', 'l o')", createVarcharType(7), "he"); assertFunction("TRIM('hello ', 'l')", createVarcharType(7), "hello "); assertFunction("TRIM(' hello world ', ' ')", createVarcharType(13), "hello world"); assertFunction("TRIM(' hello world ', ' ld')", createVarcharType(13), "hello wor"); assertFunction("TRIM(' hello world ', ' eh')", createVarcharType(13), "llo world"); assertFunction("TRIM(' hello world ', ' ehlowrd')", createVarcharType(13), ""); assertFunction("TRIM(' hello world ', ' x')", createVarcharType(13), "hello world"); // non latin characters assertFunction("TRIM('\u017a\u00f3\u0142\u0107', '\u0107\u017a')", createVarcharType(4), "\u00f3\u0142"); // invalid utf-8 characters assertFunction("CAST(TRIM(utf8(from_hex('81')), ' ') AS VARBINARY)", VARBINARY, varbinary(0x81)); assertFunction("CAST(TRIM(CONCAT(utf8(from_hex('81')), ' '), ' ') AS VARBINARY)", VARBINARY, varbinary(0x81)); assertFunction("CAST(TRIM(CONCAT(' ', utf8(from_hex('81'))), ' ') AS VARBINARY)", VARBINARY, varbinary(0x81)); assertFunction("CAST(TRIM(CONCAT(' ', utf8(from_hex('81')), ' '), ' ') AS VARBINARY)", VARBINARY, varbinary(0x81)); assertInvalidFunction("TRIM('hello world', utf8(from_hex('81')))", "Invalid UTF-8 encoding in characters: �"); assertInvalidFunction("TRIM('hello world', utf8(from_hex('3281')))", "Invalid UTF-8 encoding in characters: 2�"); } @Test public void testCharTrimParametrized() { assertFunction("TRIM(CAST('' AS CHAR(1)), '')", createCharType(1), padRight("", 1)); assertFunction("TRIM(CAST(' ' AS CHAR(3)), '')", createCharType(3), padRight("", 3)); assertFunction("TRIM(CAST(' hello ' AS CHAR(9)), '')", createCharType(9), padRight(" hello", 9)); assertFunction("TRIM(CAST(' hello ' AS CHAR(9)), ' ')", createCharType(9), padRight("hello", 9)); assertFunction("TRIM(CAST(' hello ' AS CHAR(9)), 'he ')", createCharType(9), padRight("llo", 9)); assertFunction("TRIM(CAST(' hello' AS CHAR(7)), ' ')", createCharType(7), padRight("hello", 7)); assertFunction("TRIM(CAST(' hello' AS CHAR(7)), 'e h')", createCharType(7), padRight("llo", 7)); assertFunction("TRIM(CAST('hello ' AS CHAR(7)), 'l')", createCharType(7), padRight("hello", 7)); assertFunction("TRIM(CAST(' hello world ' AS CHAR(13)), ' ')", createCharType(13), padRight("hello world", 13)); assertFunction("TRIM(CAST(' hello world ' AS CHAR(13)), ' eh')", createCharType(13), padRight("llo world", 13)); assertFunction("TRIM(CAST(' hello world ' AS CHAR(13)), ' ehlowrd')", createCharType(13), padRight("", 13)); assertFunction("TRIM(CAST(' hello world ' AS CHAR(13)), ' x')", createCharType(13), padRight("hello world", 13)); // non latin characters assertFunction("TRIM(CAST('\u017a\u00f3\u0142\u0107' AS CHAR(4)), '\u017a\u0107\u0142')", createCharType(4), padRight("\u00f3", 4)); } @Test public void testVarcharToVarcharX() { assertFunction("LOWER(CAST('HELLO' AS VARCHAR))", createUnboundedVarcharType(), "hello"); } @Test public void testLower() { assertFunction("LOWER('')", createVarcharType(0), ""); assertFunction("LOWER('Hello World')", createVarcharType(11), "hello world"); assertFunction("LOWER('WHAT!!')", createVarcharType(6), "what!!"); assertFunction("LOWER('\u00D6STERREICH')", createVarcharType(10), lowerByCodePoint("\u00D6sterreich")); assertFunction("LOWER('From\uD801\uDC2DTo')", createVarcharType(7), lowerByCodePoint("from\uD801\uDC2Dto")); assertFunction("CAST(LOWER(utf8(from_hex('CE'))) AS VARBINARY)", VARBINARY, new SqlVarbinary(new byte[] {(byte) 0xCE})); assertFunction("CAST(LOWER('HELLO' || utf8(from_hex('CE'))) AS VARBINARY)", VARBINARY, new SqlVarbinary(new byte[] {'h', 'e', 'l', 'l', 'o', (byte) 0xCE})); assertFunction("CAST(LOWER(utf8(from_hex('CE')) || 'HELLO') AS VARBINARY)", VARBINARY, new SqlVarbinary(new byte[] {(byte) 0xCE, 'h', 'e', 'l', 'l', 'o'})); assertFunction("CAST(LOWER(utf8(from_hex('C8BAFF'))) AS VARBINARY)", VARBINARY, new SqlVarbinary(new byte[] {(byte) 0xE2, (byte) 0xB1, (byte) 0xA5, (byte) 0xFF})); } @Test public void testCharLower() { assertFunction("LOWER(CAST('' AS CHAR(10)))", createCharType(10), padRight("", 10)); assertFunction("LOWER(CAST('Hello World' AS CHAR(11)))", createCharType(11), padRight("hello world", 11)); assertFunction("LOWER(CAST('WHAT!!' AS CHAR(6)))", createCharType(6), padRight("what!!", 6)); assertFunction("LOWER(CAST('\u00D6STERREICH' AS CHAR(10)))", createCharType(10), padRight(lowerByCodePoint("\u00D6sterreich"), 10)); assertFunction("LOWER(CAST('From\uD801\uDC2DTo' AS CHAR(7)))", createCharType(7), padRight(lowerByCodePoint("from\uD801\uDC2Dto"), 7)); } @Test public void testUpper() { assertFunction("UPPER('')", createVarcharType(0), ""); assertFunction("UPPER('Hello World')", createVarcharType(11), "HELLO WORLD"); assertFunction("UPPER('what!!')", createVarcharType(6), "WHAT!!"); assertFunction("UPPER('\u00D6sterreich')", createVarcharType(10), upperByCodePoint("\u00D6") + "STERREICH"); assertFunction("UPPER('From\uD801\uDC2DTo')", createVarcharType(7), "FROM" + upperByCodePoint("\uD801\uDC2D") + "TO"); assertFunction("CAST(UPPER(utf8(from_hex('CE'))) AS VARBINARY)", VARBINARY, new SqlVarbinary(new byte[] {(byte) 0xCE})); assertFunction("CAST(UPPER('hello' || utf8(from_hex('CE'))) AS VARBINARY)", VARBINARY, new SqlVarbinary(new byte[] {'H', 'E', 'L', 'L', 'O', (byte) 0xCE})); assertFunction("CAST(UPPER(utf8(from_hex('CE')) || 'hello') AS VARBINARY)", VARBINARY, new SqlVarbinary(new byte[] {(byte) 0xCE, 'H', 'E', 'L', 'L', 'O'})); } @Test public void testCharUpper() { assertFunction("UPPER(CAST('' AS CHAR(10)))", createCharType(10), padRight("", 10)); assertFunction("UPPER(CAST('Hello World' AS CHAR(11)))", createCharType(11), padRight("HELLO WORLD", 11)); assertFunction("UPPER(CAST('what!!' AS CHAR(6)))", createCharType(6), padRight("WHAT!!", 6)); assertFunction("UPPER(CAST('\u00D6sterreich' AS CHAR(10)))", createCharType(10), padRight(upperByCodePoint("\u00D6") + "STERREICH", 10)); assertFunction("UPPER(CAST('From\uD801\uDC2DTo' AS CHAR(7)))", createCharType(7), padRight("FROM" + upperByCodePoint("\uD801\uDC2D") + "TO", 7)); } @Test public void testLeftPad() { assertFunction("LPAD('text', 5, 'x')", VARCHAR, "xtext"); assertFunction("LPAD('text', 4, 'x')", VARCHAR, "text"); assertFunction("LPAD('text', 6, 'xy')", VARCHAR, "xytext"); assertFunction("LPAD('text', 7, 'xy')", VARCHAR, "xyxtext"); assertFunction("LPAD('text', 9, 'xyz')", VARCHAR, "xyzxytext"); assertFunction("LPAD('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ', 10, '\u671B')", VARCHAR, "\u671B\u4FE1\u5FF5 \u7231 \u5E0C\u671B "); assertFunction("LPAD('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ', 11, '\u671B')", VARCHAR, "\u671B\u671B\u4FE1\u5FF5 \u7231 \u5E0C\u671B "); assertFunction("LPAD('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ', 12, '\u5E0C\u671B')", VARCHAR, "\u5E0C\u671B\u5E0C\u4FE1\u5FF5 \u7231 \u5E0C\u671B "); assertFunction("LPAD('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ', 13, '\u5E0C\u671B')", VARCHAR, "\u5E0C\u671B\u5E0C\u671B\u4FE1\u5FF5 \u7231 \u5E0C\u671B "); assertFunction("LPAD('', 3, 'a')", VARCHAR, "aaa"); assertFunction("LPAD('abc', 0, 'e')", VARCHAR, ""); // truncation assertFunction("LPAD('text', 3, 'xy')", VARCHAR, "tex"); assertFunction("LPAD('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ', 5, '\u671B')", VARCHAR, "\u4FE1\u5FF5 \u7231 "); // failure modes assertInvalidFunction("LPAD('abc', 3, '')", "Padding string must not be empty"); // invalid target lengths long maxSize = Integer.MAX_VALUE; assertInvalidFunction("LPAD('abc', -1, 'foo')", "Target length must be in the range [0.." + maxSize + "]"); assertInvalidFunction("LPAD('abc', " + (maxSize + 1) + ", '')", "Target length must be in the range [0.." + maxSize + "]"); } @Test public void testRightPad() { assertFunction("RPAD('text', 5, 'x')", VARCHAR, "textx"); assertFunction("RPAD('text', 4, 'x')", VARCHAR, "text"); assertFunction("RPAD('text', 6, 'xy')", VARCHAR, "textxy"); assertFunction("RPAD('text', 7, 'xy')", VARCHAR, "textxyx"); assertFunction("RPAD('text', 9, 'xyz')", VARCHAR, "textxyzxy"); assertFunction("RPAD('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ', 10, '\u671B')", VARCHAR, "\u4FE1\u5FF5 \u7231 \u5E0C\u671B \u671B"); assertFunction("RPAD('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ', 11, '\u671B')", VARCHAR, "\u4FE1\u5FF5 \u7231 \u5E0C\u671B \u671B\u671B"); assertFunction("RPAD('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ', 12, '\u5E0C\u671B')", VARCHAR, "\u4FE1\u5FF5 \u7231 \u5E0C\u671B \u5E0C\u671B\u5E0C"); assertFunction("RPAD('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ', 13, '\u5E0C\u671B')", VARCHAR, "\u4FE1\u5FF5 \u7231 \u5E0C\u671B \u5E0C\u671B\u5E0C\u671B"); assertFunction("RPAD('', 3, 'a')", VARCHAR, "aaa"); assertFunction("RPAD('abc', 0, 'e')", VARCHAR, ""); // truncation assertFunction("RPAD('text', 3, 'xy')", VARCHAR, "tex"); assertFunction("RPAD('\u4FE1\u5FF5 \u7231 \u5E0C\u671B ', 5, '\u671B')", VARCHAR, "\u4FE1\u5FF5 \u7231 "); // failure modes assertInvalidFunction("RPAD('abc', 3, '')", "Padding string must not be empty"); // invalid target lengths long maxSize = Integer.MAX_VALUE; assertInvalidFunction("RPAD('abc', -1, 'foo')", "Target length must be in the range [0.." + maxSize + "]"); assertInvalidFunction("RPAD('abc', " + (maxSize + 1) + ", '')", "Target length must be in the range [0.." + maxSize + "]"); } @Test public void testNormalize() { assertFunction("normalize('sch\u00f6n', NFD)", VARCHAR, "scho\u0308n"); assertFunction("normalize('sch\u00f6n')", VARCHAR, "sch\u00f6n"); assertFunction("normalize('sch\u00f6n', NFC)", VARCHAR, "sch\u00f6n"); assertFunction("normalize('sch\u00f6n', NFKD)", VARCHAR, "scho\u0308n"); assertFunction("normalize('sch\u00f6n', NFKC)", VARCHAR, "sch\u00f6n"); assertFunction("normalize('\u3231\u3327\u3326\u2162', NFKC)", VARCHAR, "(\u682a)\u30c8\u30f3\u30c9\u30ebIII"); assertFunction("normalize('\uff8a\uff9d\uff76\uff78\uff76\uff85', NFKC)", VARCHAR, "\u30cf\u30f3\u30ab\u30af\u30ab\u30ca"); } @Test public void testFromLiteralParameter() { assertFunction("vl(cast('aaa' as varchar(3)))", BIGINT, 3L); assertFunction("vl(cast('aaa' as varchar(7)))", BIGINT, 7L); assertFunction("vl('aaaa')", BIGINT, 4L); } // We do not use String toLowerCase or toUpperCase here because they can do multi character transforms // and we want to verify our implementation spec which states we perform case transform code point by // code point private static String lowerByCodePoint(String string) { int[] upperCodePoints = string.codePoints().map(Character::toLowerCase).toArray(); return new String(upperCodePoints, 0, upperCodePoints.length); } private static String upperByCodePoint(String string) { int[] upperCodePoints = string.codePoints().map(Character::toUpperCase).toArray(); return new String(upperCodePoints, 0, upperCodePoints.length); } @Test public void testFromUtf8() { assertFunction("from_utf8(to_utf8('hello'))", VARCHAR, "hello"); assertFunction("from_utf8(from_hex('58BF'))", VARCHAR, "X\uFFFD"); assertFunction("from_utf8(from_hex('58DF'))", VARCHAR, "X\uFFFD"); assertFunction("from_utf8(from_hex('58F7'))", VARCHAR, "X\uFFFD"); assertFunction("from_utf8(from_hex('58BF'), '#')", VARCHAR, "X#"); assertFunction("from_utf8(from_hex('58DF'), 35)", VARCHAR, "X#"); assertFunction("from_utf8(from_hex('58BF'), '')", VARCHAR, "X"); assertInvalidFunction("from_utf8(to_utf8('hello'), 'foo')", INVALID_FUNCTION_ARGUMENT); assertInvalidFunction("from_utf8(to_utf8('hello'), 1114112)", INVALID_FUNCTION_ARGUMENT); } }