/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.hive;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.dbcp.hive.HiveDBCPService;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.AbstractSessionFactoryProcessor;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.stream.io.StreamUtils;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.sql.SQLDataException;
import java.sql.Time;
import java.sql.Timestamp;
import java.sql.Date;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Types;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* An abstract base class for HiveQL processors to share common data, methods, etc.
*/
public abstract class AbstractHiveQLProcessor extends AbstractSessionFactoryProcessor {
protected static final Pattern HIVEQL_TYPE_ATTRIBUTE_PATTERN = Pattern.compile("hiveql\\.args\\.(\\d+)\\.type");
protected static final Pattern NUMBER_PATTERN = Pattern.compile("-?\\d+");
public static final PropertyDescriptor HIVE_DBCP_SERVICE = new PropertyDescriptor.Builder()
.name("Hive Database Connection Pooling Service")
.description("The Hive Controller Service that is used to obtain connection(s) to the Hive database")
.required(true)
.identifiesControllerService(HiveDBCPService.class)
.build();
public static final PropertyDescriptor CHARSET = new PropertyDescriptor.Builder()
.name("hive-charset")
.displayName("Character Set")
.description("Specifies the character set of the record data.")
.required(true)
.defaultValue("UTF-8")
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.build();
/**
* Determines the HiveQL statement that should be executed for the given FlowFile
*
* @param session the session that can be used to access the given FlowFile
* @param flowFile the FlowFile whose HiveQL statement should be executed
* @return the HiveQL that is associated with the given FlowFile
*/
protected String getHiveQL(final ProcessSession session, final FlowFile flowFile, final Charset charset) {
// Read the HiveQL from the FlowFile's content
final byte[] buffer = new byte[(int) flowFile.getSize()];
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
StreamUtils.fillBuffer(in, buffer);
}
});
// Create the PreparedStatement to use for this FlowFile.
return new String(buffer, charset);
}
private class ParameterHolder {
String attributeName;
int jdbcType;
String value;
}
/**
* Sets all of the appropriate parameters on the given PreparedStatement, based on the given FlowFile attributes.
*
* @param stmt the statement to set the parameters on
* @param attributes the attributes from which to derive parameter indices, values, and types
* @throws SQLException if the PreparedStatement throws a SQLException when the appropriate setter is called
*/
protected int setParameters(int base, final PreparedStatement stmt, int paramCount, final Map<String, String> attributes) throws SQLException {
Map<Integer, ParameterHolder> parmMap = new TreeMap<Integer, ParameterHolder>();
for (final Map.Entry<String, String> entry : attributes.entrySet()) {
final String key = entry.getKey();
final Matcher matcher = HIVEQL_TYPE_ATTRIBUTE_PATTERN.matcher(key);
if (matcher.matches()) {
final int parameterIndex = Integer.parseInt(matcher.group(1));
if (parameterIndex >= base && parameterIndex < base + paramCount) {
final boolean isNumeric = NUMBER_PATTERN.matcher(entry.getValue()).matches();
if (!isNumeric) {
throw new SQLDataException("Value of the " + key + " attribute is '" + entry.getValue() + "', which is not a valid JDBC numeral jdbcType");
}
final String valueAttrName = "hiveql.args." + parameterIndex + ".value";
ParameterHolder ph = new ParameterHolder();
int realIndexLoc = parameterIndex - base +1;
ph.jdbcType = Integer.parseInt(entry.getValue());
ph.value = attributes.get(valueAttrName);
ph.attributeName = valueAttrName;
parmMap.put(realIndexLoc, ph);
}
}
}
// Now that's we've retrieved the correct number of parameters and it's sorted, let's set them.
for (final Map.Entry<Integer, ParameterHolder> entry : parmMap.entrySet()) {
final Integer index = entry.getKey();
final ParameterHolder ph = entry.getValue();
try {
setParameter(stmt, ph.attributeName, index, ph.value, ph.jdbcType);
} catch (final NumberFormatException nfe) {
throw new SQLDataException("The value of the " + ph.attributeName + " is '" + ph.value + "', which cannot be converted into the necessary data jdbcType", nfe);
}
}
return base + paramCount;
}
/**
* Determines how to map the given value to the appropriate JDBC data jdbcType and sets the parameter on the
* provided PreparedStatement
*
* @param stmt the PreparedStatement to set the parameter on
* @param attrName the name of the attribute that the parameter is coming from - for logging purposes
* @param parameterIndex the index of the HiveQL parameter to set
* @param parameterValue the value of the HiveQL parameter to set
* @param jdbcType the JDBC Type of the HiveQL parameter to set
* @throws SQLException if the PreparedStatement throws a SQLException when calling the appropriate setter
*/
protected void setParameter(final PreparedStatement stmt, final String attrName, final int parameterIndex, final String parameterValue, final int jdbcType) throws SQLException {
if (parameterValue == null) {
stmt.setNull(parameterIndex, jdbcType);
} else {
try {
switch (jdbcType) {
case Types.BIT:
case Types.BOOLEAN:
stmt.setBoolean(parameterIndex, Boolean.parseBoolean(parameterValue));
break;
case Types.TINYINT:
stmt.setByte(parameterIndex, Byte.parseByte(parameterValue));
break;
case Types.SMALLINT:
stmt.setShort(parameterIndex, Short.parseShort(parameterValue));
break;
case Types.INTEGER:
stmt.setInt(parameterIndex, Integer.parseInt(parameterValue));
break;
case Types.BIGINT:
stmt.setLong(parameterIndex, Long.parseLong(parameterValue));
break;
case Types.REAL:
stmt.setFloat(parameterIndex, Float.parseFloat(parameterValue));
break;
case Types.FLOAT:
case Types.DOUBLE:
stmt.setDouble(parameterIndex, Double.parseDouble(parameterValue));
break;
case Types.DECIMAL:
case Types.NUMERIC:
stmt.setBigDecimal(parameterIndex, new BigDecimal(parameterValue));
break;
case Types.DATE:
stmt.setDate(parameterIndex, new Date(Long.parseLong(parameterValue)));
break;
case Types.TIME:
stmt.setTime(parameterIndex, new Time(Long.parseLong(parameterValue)));
break;
case Types.TIMESTAMP:
stmt.setTimestamp(parameterIndex, new Timestamp(Long.parseLong(parameterValue)));
break;
case Types.CHAR:
case Types.VARCHAR:
case Types.LONGNVARCHAR:
case Types.LONGVARCHAR:
stmt.setString(parameterIndex, parameterValue);
break;
default:
stmt.setObject(parameterIndex, parameterValue, jdbcType);
break;
}
} catch (SQLException e) {
// Log which attribute/parameter had an error, then rethrow to be handled at the top level
getLogger().error("Error setting parameter {} to value from {} ({})", new Object[]{parameterIndex, attrName, parameterValue}, e);
throw e;
}
}
}
}