/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.store.hbase; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.drill.common.expression.FunctionCall; import org.apache.drill.common.expression.ValueExpressions.QuotedString; public class HBaseRegexParser { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HBaseRegexParser.class); /** * Regular expression pattern to parse the value operand of the SQL LIKE operator. * The tokens could be one of the 3 types.<br/> * <ol> * <li>Wildcards, i.e. "%" or "_" ==> first regex group ([%_])</li> * <li>Character ranges, i.e. "[]" or "[^]" ==> second regex group (\[[^]]*\])</li> * <li>Literals ==> third regex group ([^%_\[]+)</li> * </ol> */ private static final Pattern SQL_LIKE_REGEX = Pattern.compile("([%_])|(\\[[^]]*\\])|([^%_\\[]+)"); private static final String SQL_LIKE_ESCAPE_REGEX_STR = "(%s.?)|([%%_])|(\\[[^]]*\\])|([^%%_\\[%s]+)"; private static final String JAVA_REGEX_SPECIALS = ".()[]{}<>|^-+=*?!$\\"; private final String likeString_; private final String escapeChar_; private String regexString_ = null; private String prefixString_ = null; public HBaseRegexParser(FunctionCall call) { this(likeString(call), escapeString(call)); } public HBaseRegexParser(String likeString) { this(likeString, null); } public HBaseRegexParser(String likeString, Character escapeChar) { likeString_ = likeString; if (escapeChar == null) { escapeChar_ = null; } else { escapeChar_ = JAVA_REGEX_SPECIALS.indexOf(escapeChar) == -1 ? String.valueOf(escapeChar) : ("\\" + escapeChar); } } /** * Convert a SQL LIKE operator Value to a Regular Expression. */ public HBaseRegexParser parse() { if (regexString_ != null) { return this; } Matcher matcher = null; StringBuilder prefixSB = new StringBuilder(); StringBuilder regexSB = new StringBuilder("^"); // starts with if (escapeChar_ == null) { matcher = SQL_LIKE_REGEX.matcher(likeString_); } else { /* * When an escape character is specified, add another capturing group * with the escape character in the front for the escape sequence and * add the escape character to the exclusion list of literals */ matcher = Pattern.compile( String.format(SQL_LIKE_ESCAPE_REGEX_STR, escapeChar_, escapeChar_)) .matcher(likeString_); } String fragment = null; boolean literalsSoFar = true; while (matcher.find()) { if (escapeChar_ != null && matcher.group(1) != null) { fragment = matcher.group(1); if (fragment.length() != 2) { throw new IllegalArgumentException("Invalid fragment '" + fragment + "' at index " + matcher.start() + " in the LIKE operand '" + likeString_ + "'"); } String escapedChar = fragment.substring(1); if (literalsSoFar) { prefixSB.append(escapedChar); } regexSB.append(Pattern.quote(escapedChar)); } else { fragment = matcher.group(); switch (fragment) { case "_": // LIKE('_') => REGEX('.') literalsSoFar = false; regexSB.append("."); break; case "%": // LIKE('%') => REGEX('.*') literalsSoFar = false; regexSB.append(".*"); break; default: // ALL other including character ranges if (fragment.startsWith("[") && fragment.endsWith("]")) { literalsSoFar = false; regexSB.append(fragment); } else { if (literalsSoFar) { prefixSB.append(fragment); } // found literal, just quote it. regexSB.append(Pattern.quote(fragment)); } break; } } } prefixString_ = prefixSB.toString(); regexString_ = regexSB.append("$") // ends with .toString(); logger.debug("Converted LIKE string '{}' to REGEX string '{}'.", likeString_, regexString_); return this; } public String getRegexString() { return regexString_; } public String getPrefixString() { return prefixString_; } public String getLikeString() { return likeString_; } private static String likeString(FunctionCall call) { return ((QuotedString) call.args.get(1)).value; } private static Character escapeString(FunctionCall call) { if (call.args.size() > 2) { return ((QuotedString) call.args.get(2)).value.charAt(0); } return null; } }