/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.accumulo.mr;
import java.io.IOException;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.mapred.AccumuloOutputFormat;
import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.Mutation;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters;
import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding;
import org.apache.hadoop.hive.accumulo.serde.AccumuloIndexParameters;
import org.apache.hadoop.hive.accumulo.HiveAccumuloHelper;
import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.util.Progressable;
import com.google.common.base.Preconditions;
/**
*
*/
public class HiveAccumuloTableOutputFormat extends AccumuloIndexedOutputFormat {
protected final HiveAccumuloHelper helper = new HiveAccumuloHelper();
@Override
public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException {
configureAccumuloOutputFormat(job);
super.checkOutputSpecs(ignored, job);
}
@Override
public RecordWriter<Text, Mutation> getRecordWriter(FileSystem ignored, JobConf job, String name,
Progressable progress) throws IOException {
configureAccumuloOutputFormat(job);
return super.getRecordWriter(ignored, job, name, progress);
}
protected void configureAccumuloOutputFormat(JobConf job) throws IOException {
AccumuloConnectionParameters cnxnParams = getConnectionParams(job);
final String tableName = job.get(AccumuloSerDeParameters.TABLE_NAME);
// Make sure we actually go the table name
Preconditions.checkNotNull(tableName,
"Expected Accumulo table name to be provided in job configuration");
// Set the necessary Accumulo information
try {
if (cnxnParams.useMockInstance()) {
setMockInstanceWithErrorChecking(job, cnxnParams.getAccumuloInstanceName());
} else {
// Accumulo instance name with ZK quorum
setZooKeeperInstanceWithErrorChecking(job, cnxnParams.getAccumuloInstanceName(),
cnxnParams.getZooKeepers(), cnxnParams.useSasl());
}
// Extract the delegation Token from the UGI and add it to the job
// The AccumuloOutputFormat will look for it there.
if (cnxnParams.useSasl()) {
UserGroupInformation ugi = getCurrentUser();
if (!hasKerberosCredentials(ugi)) {
getHelper().addTokenFromUserToJobConf(ugi, job);
} else {
// Still in the local JVM, can use Kerberos credentials
try {
Connector connector = cnxnParams.getConnector();
AuthenticationToken token = getHelper().getDelegationToken(connector);
// Send the DelegationToken down to the Configuration for Accumulo to use
setConnectorInfoWithErrorChecking(job, cnxnParams.getAccumuloUserName(), token);
// Convert the Accumulo token in a Hadoop token
Token<? extends TokenIdentifier> accumuloToken = getHelper().getHadoopToken(token);
log.info("Adding Hadoop Token for Accumulo to Job's Credentials");
// Add the Hadoop token to the JobConf
getHelper().mergeTokenIntoJobConf(job, accumuloToken);
// Make sure the UGI contains the token too for good measure
if (!ugi.addToken(accumuloToken)) {
throw new IOException("Failed to add Accumulo Token to UGI");
}
} catch (AccumuloException | AccumuloSecurityException e) {
throw new IOException("Failed to acquire Accumulo DelegationToken", e);
}
}
} else {
setConnectorInfoWithErrorChecking(job, cnxnParams.getAccumuloUserName(),
new PasswordToken(cnxnParams.getAccumuloPassword()));
}
// Set the table where we're writing this data
setDefaultAccumuloTableName(job, tableName);
// Set the index table information
final String indexTableName = job.get(AccumuloIndexParameters.INDEXTABLE_NAME);
final String indexedColumns = job.get(AccumuloIndexParameters.INDEXED_COLUMNS);
final String columnTypes = job.get(serdeConstants.LIST_COLUMN_TYPES);
final boolean binaryEncoding = ColumnEncoding.BINARY.getName()
.equalsIgnoreCase(job.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE));
setAccumuloIndexTableName(job, indexTableName);
setAccumuloIndexColumns(job, indexedColumns);
setAccumuloStringEncoding(job, !binaryEncoding);
} catch (AccumuloSecurityException e) {
log.error("Could not connect to Accumulo with provided credentials", e);
throw new IOException(e);
}
}
// Non-static methods to wrap the static AccumuloOutputFormat methods to enable testing
protected void setConnectorInfoWithErrorChecking(JobConf conf, String username,
AuthenticationToken token) throws AccumuloSecurityException {
try {
AccumuloIndexedOutputFormat.setConnectorInfo(conf, username, token);
} catch (IllegalStateException e) {
// AccumuloOutputFormat complains if you re-set an already set value. We just don't care.
log.debug("Ignoring exception setting Accumulo Connector instance for user " + username, e);
}
}
@SuppressWarnings("deprecation")
protected void setZooKeeperInstanceWithErrorChecking(JobConf conf, String instanceName,
String zookeepers, boolean isSasl) throws IOException {
try {
if (isSasl) {
// Reflection to support Accumulo 1.5. Remove when Accumulo 1.5 support is dropped
// 1.6 works with the deprecated 1.5 method, but must use reflection for 1.7-only
// SASL support
getHelper().setZooKeeperInstance(conf, AccumuloOutputFormat.class, zookeepers, instanceName,
isSasl);
} else {
AccumuloIndexedOutputFormat.setZooKeeperInstance(conf, instanceName, zookeepers);
}
} catch (IllegalStateException ise) {
// AccumuloOutputFormat complains if you re-set an already set value. We just don't care.
log.debug("Ignoring exception setting ZooKeeper instance of " + instanceName + " at "
+ zookeepers, ise);
}
}
protected void setMockInstanceWithErrorChecking(JobConf conf, String instanceName) {
try {
AccumuloIndexedOutputFormat.setMockInstance(conf, instanceName);
} catch (IllegalStateException e) {
// AccumuloOutputFormat complains if you re-set an already set value. We just don't care.
log.debug("Ignoring exception setting mock instance of " + instanceName, e);
}
}
protected void setDefaultAccumuloTableName(JobConf conf, String tableName) {
AccumuloIndexedOutputFormat.setDefaultTableName(conf, tableName);
}
protected void setAccumuloIndexTableName(JobConf conf, String indexTableName) {
AccumuloIndexedOutputFormat.setIndexTableName(conf, indexTableName);
}
protected void setAccumuloIndexColumns(JobConf conf, String indexColumns) {
AccumuloIndexedOutputFormat.setIndexColumns(conf, indexColumns);
}
protected void setAccumuloStringEncoding(JobConf conf, Boolean isStringEncoded) {
AccumuloIndexedOutputFormat.setStringEncoding(conf, isStringEncoded);
}
HiveAccumuloHelper getHelper() {
// Allows mocking in testing.
return helper;
}
AccumuloConnectionParameters getConnectionParams(JobConf conf) {
// Allows mocking in testing.
return new AccumuloConnectionParameters(conf);
}
boolean hasKerberosCredentials(UserGroupInformation ugi) {
// Allows mocking in testing.
return ugi.hasKerberosCredentials();
}
UserGroupInformation getCurrentUser() throws IOException {
// Allows mocking in testing.
return UserGroupInformation.getCurrentUser();
}
}