/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.type; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.function.LiteralParameters; import com.facebook.presto.spi.function.OperatorType; import com.facebook.presto.spi.function.ScalarFunction; import com.facebook.presto.spi.function.ScalarOperator; import com.facebook.presto.spi.function.SqlType; import com.facebook.presto.spi.type.StandardTypes; import io.airlift.jcodings.specific.NonStrictUTF8Encoding; import io.airlift.joni.Option; import io.airlift.joni.Regex; import io.airlift.joni.Syntax; import io.airlift.slice.Slice; import io.airlift.slice.Slices; import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; import static com.facebook.presto.spi.type.Chars.padSpaces; import static com.facebook.presto.util.Failures.checkCondition; import static io.airlift.joni.constants.MetaChar.INEFFECTIVE_META_CHAR; import static io.airlift.joni.constants.SyntaxProperties.OP_ASTERISK_ZERO_INF; import static io.airlift.joni.constants.SyntaxProperties.OP_DOT_ANYCHAR; import static io.airlift.joni.constants.SyntaxProperties.OP_LINE_ANCHOR; import static java.nio.charset.StandardCharsets.UTF_8; public final class LikeFunctions { private static final Syntax SYNTAX = new Syntax( OP_DOT_ANYCHAR | OP_ASTERISK_ZERO_INF | OP_LINE_ANCHOR, 0, 0, Option.NONE, new Syntax.MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); private LikeFunctions() {} // TODO: this should not be callable from SQL @ScalarFunction(value = "like", hidden = true) @LiteralParameters("x") @SqlType(StandardTypes.BOOLEAN) public static boolean like(@SqlType("varchar(x)") Slice value, @SqlType(LikePatternType.NAME) Regex pattern) { // Joni can infinite loop with UTF8Encoding when invalid UTF-8 is encountered. // NonStrictUTF8Encoding must be used to avoid this issue. byte[] bytes = value.getBytes(); return regexMatches(pattern, bytes); } @ScalarOperator(OperatorType.CAST) @LiteralParameters("x") @SqlType(LikePatternType.NAME) public static Regex castVarcharToLikePattern(@SqlType("varchar(x)") Slice pattern) { return likePattern(pattern); } @ScalarOperator(OperatorType.CAST) @LiteralParameters("x") @SqlType(LikePatternType.NAME) public static Regex castCharToLikePattern(@LiteralParameter("x") Long charLength, @SqlType("char(x)") Slice pattern) { return likePattern(padSpaces(pattern, charLength.intValue())); } public static Regex likePattern(Slice pattern) { return likePattern(pattern.toStringUtf8(), '0', false); } @ScalarFunction @LiteralParameters({"x", "y"}) @SqlType(LikePatternType.NAME) public static Regex likePattern(@SqlType("varchar(x)") Slice pattern, @SqlType("varchar(y)") Slice escape) { return likePattern(pattern.toStringUtf8(), getEscapeChar(escape), true); } public static boolean isLikePattern(Slice pattern, Slice escape) { String stringPattern = pattern.toStringUtf8(); if (escape == null) { return stringPattern.contains("%") || stringPattern.contains("_"); } String stringEscape = escape.toStringUtf8(); checkCondition(stringEscape.length() == 1, INVALID_FUNCTION_ARGUMENT, "Escape string must be a single character"); char escapeChar = stringEscape.charAt(0); boolean escaped = false; boolean isLikePattern = false; for (int currentChar : stringPattern.codePoints().toArray()) { if (!escaped && (currentChar == escapeChar)) { escaped = true; } else if (escaped) { checkEscape(currentChar == '%' || currentChar == '_' || currentChar == escapeChar); escaped = false; } else if ((currentChar == '%') || (currentChar == '_')) { isLikePattern = true; } } checkEscape(!escaped); return isLikePattern; } public static Slice unescapeLiteralLikePattern(Slice pattern, Slice escape) { if (escape == null) { return pattern; } String stringEscape = escape.toStringUtf8(); char escapeChar = stringEscape.charAt(0); String stringPattern = pattern.toStringUtf8(); StringBuilder unescapedPattern = new StringBuilder(stringPattern.length()); boolean escaped = false; for (int currentChar : stringPattern.codePoints().toArray()) { if (!escaped && (currentChar == escapeChar)) { escaped = true; } else { unescapedPattern.append(Character.toChars(currentChar)); escaped = false; } } return Slices.utf8Slice(unescapedPattern.toString()); } private static void checkEscape(boolean condition) { checkCondition(condition, INVALID_FUNCTION_ARGUMENT, "Escape character must be followed by '%%', '_' or the escape character itself"); } private static boolean regexMatches(Regex regex, byte[] bytes) { return regex.matcher(bytes).match(0, bytes.length, Option.NONE) != -1; } @SuppressWarnings("NestedSwitchStatement") private static Regex likePattern(String patternString, char escapeChar, boolean shouldEscape) { StringBuilder regex = new StringBuilder(patternString.length() * 2); regex.append('^'); boolean escaped = false; for (char currentChar : patternString.toCharArray()) { checkEscape(!escaped || currentChar == '%' || currentChar == '_' || currentChar == escapeChar); if (shouldEscape && !escaped && (currentChar == escapeChar)) { escaped = true; } else { switch (currentChar) { case '%': regex.append(escaped ? "%" : ".*"); escaped = false; break; case '_': regex.append(escaped ? "_" : "."); escaped = false; break; default: // escape special regex characters switch (currentChar) { case '\\': case '^': case '$': case '.': case '*': regex.append('\\'); } regex.append(currentChar); escaped = false; } } } checkEscape(!escaped); regex.append('$'); byte[] bytes = regex.toString().getBytes(UTF_8); return new Regex(bytes, 0, bytes.length, Option.MULTILINE, NonStrictUTF8Encoding.INSTANCE, SYNTAX); } @SuppressWarnings("NumericCastThatLosesPrecision") private static char getEscapeChar(Slice escape) { String escapeString = escape.toStringUtf8(); if (escapeString.isEmpty()) { // escaping disabled return (char) -1; // invalid character } if (escapeString.length() == 1) { return escapeString.charAt(0); } throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Escape string must be a single character"); } }