/*
* Copyright 2011-2015 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.data.hadoop.hive;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.ConfigValSecurityException;
import org.apache.hadoop.hive.metastore.api.IndexAlreadyExistsException;
import org.apache.hadoop.hive.service.HiveServerException;
import org.apache.hadoop.io.IOUtils;
import org.apache.thrift.TBase;
import org.apache.thrift.TException;
import org.springframework.dao.DataAccessException;
import org.springframework.dao.DataAccessResourceFailureException;
import org.springframework.dao.DataIntegrityViolationException;
import org.springframework.dao.DataRetrievalFailureException;
import org.springframework.dao.InvalidDataAccessResourceUsageException;
import org.springframework.dao.NonTransientDataAccessResourceException;
import org.springframework.dao.PermissionDeniedDataAccessException;
import org.springframework.dao.TransientDataAccessResourceException;
import org.springframework.jdbc.BadSqlGrammarException;
import org.springframework.util.Assert;
import org.springframework.util.StringUtils;
/**
* Utility for executing Hive scripts through a {@link HiveClient}. The main reason for this class
* is to handle the parsing of the script content before submitting that to the {@link HiveClient}.
*
* @author Costin Leau
* @author Thomas Risberg
*/
abstract class HiveUtils {
static DataAccessException convert(Exception ex) {
if (ex == null) {
return null;
}
if (ex instanceof RuntimeException) {
throw (RuntimeException) ex;
}
// Thrift client exception
if (ex instanceof HiveServerException) {
return convert((HiveServerException) ex);
}
if (ex instanceof TException) {
return new DataAccessResourceFailureException(ex.getMessage(), ex);
}
// HiveClient MetaStore Thrift API exceptions
if (ex instanceof TBase) {
// meta exceptions
if (ex instanceof AlreadyExistsException || ex instanceof IndexAlreadyExistsException) {
return new DataIntegrityViolationException(ex.toString(), ex);
}
if (ex instanceof ConfigValSecurityException) {
return new PermissionDeniedDataAccessException(ex.toString(), ex);
}
// fallback
return new InvalidDataAccessResourceUsageException(ex.toString(), ex);
}
// unknown
return new NonTransientDataAccessResourceException("Unknown exception", ex);
}
private static DataAccessException convert(HiveServerException ex) {
int err = ex.getErrorCode();
String sqlState = ex.getSQLState();
String cause = (ex.getCause() != null ? ex.getCause().getMessage() : ex.getMessage());
// see https://issues.apache.org/jira/browse/HIVE-2661
switch (err) {
// 10 - semantic analysis
case 10:
return new DataRetrievalFailureException(cause, ex);
// 11 - parse error
case 11:
return new BadSqlGrammarException("Hive query", "", new SQLException(cause, sqlState));
// 12 - Internal error
case 12:
return new NonTransientDataAccessResourceException(cause, ex);
// -10000 - another internal error
case -10000:
return new NonTransientDataAccessResourceException(cause, ex);
}
// look at the SQL code
if ("08S01".equals(sqlState)) {
// internal error
return new NonTransientDataAccessResourceException(cause, ex);
}
// generic syntax error
else if ("42000".equals(sqlState)) {
return new BadSqlGrammarException("Hive query", "", new SQLException(cause, sqlState));
}
// not found/already exists
else if ("42S02".equals(sqlState)) {
return new InvalidDataAccessResourceUsageException(cause, ex);
}
// invalid argument
else if ("21000".equals(sqlState)) {
return new BadSqlGrammarException("Hive query", "", new SQLException(cause, sqlState));
}
// use the new Hive 0.10 codes
// https://issues.apache.org/jira/browse/HIVE-3001
// semantic analysis
if (err >= 10000 && err <= 19999) {
return new InvalidDataAccessResourceUsageException(cause, ex);
}
// non transient runtime errors
else if (err >= 20000 && err <= 29999) {
return new DataRetrievalFailureException(cause, ex);
}
// transient error - should retry
else if (err >= 30000 && err <= 39999) {
return new TransientDataAccessResourceException(cause, ex);
}
// internal/unknown errors
else if (err >= 40000 && err <= 49999) {
return new NonTransientDataAccessResourceException(cause, ex);
}
// unknown error
return new NonTransientDataAccessResourceException(cause, ex);
}
static List<String> run(HiveClient hive, Iterable<HiveScript> scripts) throws Exception {
Assert.notNull(scripts, "at least one script is required");
List<String> results = new ArrayList<String>();
for (HiveScript hiveScript : scripts) {
results.addAll(run(hive, hiveScript));
}
return results;
}
static List<String> runWithConversion(HiveClient hive, Iterable<HiveScript> scripts, boolean closeHive) throws DataAccessException {
try {
return run(hive, scripts);
} catch (Exception ex) {
throw convert(ex);
} finally {
try {
if (closeHive) {
hive.shutdown();
}
} catch (Exception ex) {
throw new InvalidDataAccessResourceUsageException("Error while closing client connection", ex);
}
}
}
/**
* Runs (or executes) the given script with the given parameters. Note that in order to support the given
* parameters, the utility will execute extra commands (hence the returned result will reflect that).
* As these are client variables, they are bound to the hiveconf namespace. That means other scripts do not see them
* and they need to be accessed using the ${hiveconf:XXX} syntax.
*
* @param hive hive client
* @param script script to run
* @return the script results
* @throws Exception
*/
private static List<String> run(HiveClient hive, HiveScript script) throws Exception {
BufferedReader reader;
InputStream stream;
try {
stream = script.getResource().getInputStream();
reader = new BufferedReader(new InputStreamReader(stream));
} catch (Exception ex) {
throw new IllegalArgumentException("Cannot open script [" + script.getResource() + "]", ex);
}
List<String> results = new ArrayList<String>();
Map<String, String> params = script.getArguments();
// process params first
if (params != null) {
for (Map.Entry<String, String> entry : params.entrySet()) {
results.addAll(runCommand(hive, "SET hiveconf:" + entry.getKey() + "=" + entry.getValue()));
}
}
String line = null;
try {
String command = "";
while ((line = reader.readLine()) != null) {
// strip whitespace
line = line.trim();
// ignore comments
if (!line.startsWith("--")) {
int nrCmds = StringUtils.countOccurrencesOf(line, ";");
for (String token : line.split(";")) {
token = token.trim();
// skip empty lines
if (StringUtils.hasText(token)) {
command += token.concat(" ");
if (nrCmds > 0) {
results.addAll(runCommand(hive, command));
nrCmds--;
command = "";
}
}
}
}
}
// make sure to flush any command left (w/o ;)
if (StringUtils.hasText(command)) {
results.addAll(runCommand(hive, command));
}
} catch (IOException ex) {
throw new IllegalArgumentException("Cannot read scripts", ex);
} finally {
IOUtils.closeStream(reader);
}
return results;
}
private static List<String> runCommand(HiveClient hive, String command) throws Exception {
return hive.execute(command);
}
}