/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.directio.hive.syntax; import java.util.Collections; import java.util.HashSet; import java.util.Locale; import java.util.Set; import java.util.regex.Pattern; /** * Syntactic utilities about Hive. * @see <a href="https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL">Hive DDL</a> * @since 0.8.1 */ public final class HiveSyntax { private static final char LITERAL_ESCAPE = '\\'; private static final Pattern PATTERN_IDENTIFIER = Pattern.compile("[A-Za-z_][A-Za-z0-9_]*"); private HiveSyntax() { return; } private static final Set<String> DDL_KEYWORDS; static { String[] keywords = { "ADD", "ADMIN", "AFTER", "ALL", "ALTER", "ANALYZE", "AND", "ARCHIVE", "ARRAY", "AS", "ASC", "AUTHORIZATION", "BEFORE", "BETWEEN", "BIGINT", "BINARY", "BOOLEAN", "BOTH", "BUCKET", "BUCKETS", "BY", "CASCADE", "CASE", "CAST", "CHANGE", "CHAR", "CLUSTER", "CLUSTERED", "CLUSTERSTATUS", "COLLECTION", "COLUMN", "COLUMNS", "COMMENT", "COMPACT", "COMPACTIONS", "COMPUTE", "CONCATENATE", "CONF", "CONTINUE", "CREATE", "CROSS", "CUBE", "CURRENT", "CURRENT_DATE", "CURRENT_TIMESTAMP", "CURSOR", "DATA", "DATABASE", "DATABASES", "DATE", "DATETIME", "DAY", "DBPROPERTIES", "DECIMAL", "DEFERRED", "DEFINED", "DELETE", "DELIMITED", "DEPENDENCY", "DESC", "DESCRIBE", "DIRECTORIES", "DIRECTORY", "DISABLE", "DISTINCT", "DISTRIBUTE", "DOUBLE", "DROP", "ELEM_TYPE", "ELSE", "ENABLE", "END", "ESCAPED", "EXCHANGE", "EXCLUSIVE", "EXISTS", "EXPLAIN", "EXPORT", "EXTENDED", "EXTERNAL", "FALSE", "FETCH", "FIELDS", "FILE", "FILEFORMAT", "FIRST", "FLOAT", "FOLLOWING", "FOR", "FORMAT", "FORMATTED", "FROM", "FULL", "FUNCTION", "FUNCTIONS", "GRANT", "GROUP", "GROUPING", "HAVING", "HOLD_DDLTIME", "HOUR", "IDXPROPERTIES", "IF", "IGNORE", "IMPORT", "IN", "INDEX", "INDEXES", "INNER", "INPATH", "INPUTDRIVER", "INPUTFORMAT", "INSERT", "INT", "INTERSECT", "INTERVAL", "INTO", "IS", "ITEMS", "JAR", "JOIN", "KEYS", "KEY_TYPE", "LATERAL", "LEFT", "LESS", "LIKE", "LIMIT", "LINES", "LOAD", "LOCAL", "LOCATION", "LOCK", "LOCKS", "LOGICAL", "LONG", "MACRO", "MAP", "MAPJOIN", "MATERIALIZED", "MINUS", "MINUTE", "MONTH", "MORE", "MSCK", "NONE", "NOSCAN", "NOT", "NO_DROP", "NULL", "OF", "OFFLINE", "ON", "OPTION", "OR", "ORDER", "OUT", "OUTER", "OUTPUTDRIVER", "OUTPUTFORMAT", "OVER", "OVERWRITE", "OWNER", "PARTIALSCAN", "PARTITION", "PARTITIONED", "PARTITIONS", "PERCENT", "PLUS", "PRECEDING", "PRESERVE", "PRETTY", "PRINCIPALS", "PROCEDURE", "PROTECTION", "PURGE", "RANGE", "READ", "READONLY", "READS", "REBUILD", "RECORDREADER", "RECORDWRITER", "REDUCE", "REGEXP", "RELOAD", "RENAME", "REPAIR", "REPLACE", "RESTRICT", "REVOKE", "REWRITE", "RIGHT", "RLIKE", "ROLE", "ROLES", "ROLLUP", "ROW", "ROWS", "SCHEMA", "SCHEMAS", "SECOND", "SELECT", "SEMI", "SERDE", "SERDEPROPERTIES", "SERVER", "SET", "SETS", "SHARED", "SHOW", "SHOW_DATABASE", "SKEWED", "SMALLINT", "SORT", "SORTED", "SSL", "STATISTICS", "STORED", "STREAMTABLE", "STRING", "STRUCT", "TABLE", "TABLES", "TABLESAMPLE", "TBLPROPERTIES", "TEMPORARY", "TERMINATED", "THEN", "TIMESTAMP", "TINYINT", "TO", "TOUCH", "TRANSACTIONS", "TRANSFORM", "TRIGGER", "TRUE", "TRUNCATE", "UNARCHIVE", "UNBOUNDED", "UNDO", "UNION", "UNIONTYPE", "UNIQUEJOIN", "UNLOCK", "UNSET", "UNSIGNED", "UPDATE", "URI", "USE", "USER", "USING", "UTC", "UTCTIMESTAMP", "VALUES", "VALUE_TYPE", "VARCHAR", "VIEW", "WHEN", "WHERE", "WHILE", "WINDOW", "WITH", "YEAR", }; Set<String> s = new HashSet<>(keywords.length * 2); Collections.addAll(s, keywords); DDL_KEYWORDS = Collections.unmodifiableSet(s); } /** * Returns whether the identifier is a DDL keyword or not. * @param identifier the target identifier * @return {@code true} if the identifier is one of DDL keyword, otherwise {@code false} */ public static boolean isDdlKeyword(String identifier) { return DDL_KEYWORDS.contains(identifier.toUpperCase(Locale.ENGLISH)); } /** * Returns the valid identifier token in Hive DDL. * @param identifier the bare identifier * @return the valid identifier token */ public static String quoteIdentifier(String identifier) { if (isDdlKeyword(identifier) || PATTERN_IDENTIFIER.matcher(identifier).matches() == false) { // see https://issues.apache.org/jira/secure/attachment/12618321/QuotedIdentifier.html // the identifier must be a 'RegexComponent', but we does not check it here return "`" + identifier + "`"; } else { return identifier; } } /** * Returns the quoted string literal. * @param quote the quote character * @param string the target string * @return the quoted string literal */ public static String quoteLiteral(char quote, String string) { StringBuilder buf = new StringBuilder(); buf.append(quote); for (int i = 0, n = string.length(); i < n; i++) { char c = string.charAt(i); if (c == quote || c == LITERAL_ESCAPE) { buf.append(LITERAL_ESCAPE); buf.append(c); } else if (Character.isISOControl(c)) { buf.append(String.format("\\%03o", (int) c)); //$NON-NLS-1$ } else { buf.append(c); } } buf.append(quote); return buf.toString(); } }