/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.accumulo.mr;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.MultiTableBatchWriter;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.mapred.AccumuloOutputFormat;
import org.apache.accumulo.core.data.ColumnUpdate;
import org.apache.accumulo.core.data.KeyExtent;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.security.ColumnVisibility;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.accumulo.AccumuloIndexLexicoder;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Progressable;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Extension of AccumuloOutputFormat to support indexing.
*/
public class AccumuloIndexedOutputFormat extends AccumuloOutputFormat {
private static final Logger LOG = Logger.getLogger(AccumuloIndexedOutputFormat.class);
private static final Class<?> CLASS = AccumuloOutputFormat.class;
private static final byte[] EMPTY_BYTES = new byte[0];
public static void setIndexTableName(JobConf job, String tableName) {
IndexOutputConfigurator.setIndexTableName(CLASS, job, tableName);
}
protected static String getIndexTableName(JobConf job) {
return IndexOutputConfigurator.getIndexTableName(CLASS, job);
}
public static void setIndexColumns(JobConf job, String fields) {
IndexOutputConfigurator.setIndexColumns(CLASS, job, fields);
}
protected static String getIndexColumns(JobConf job) {
return IndexOutputConfigurator.getIndexColumns(CLASS, job);
}
public static void setStringEncoding(JobConf job, Boolean isStringEncoding) {
IndexOutputConfigurator.setRecordEncoding(CLASS, job, isStringEncoding);
}
protected static Boolean getStringEncoding(JobConf job) {
return IndexOutputConfigurator.getRecordEncoding(CLASS, job);
}
public RecordWriter<Text, Mutation> getRecordWriter(FileSystem ignored, JobConf job,
String name, Progressable progress) throws IOException {
try {
return new AccumuloIndexedOutputFormat.AccumuloRecordWriter(job);
} catch (Exception e) {
throw new IOException(e);
}
}
protected static class AccumuloRecordWriter implements RecordWriter<Text, Mutation> {
private MultiTableBatchWriter mtbw = null;
private Map<Text, BatchWriter> bws = null;
private Text defaultTableName = null;
private Text indexTableName = null;
private boolean simulate = false;
private boolean createTables = false;
private boolean isStringEncoded = true;
private long mutCount = 0L;
private long valCount = 0L;
private Connector conn;
private AccumuloIndexDefinition indexDef = null;
protected AccumuloRecordWriter(JobConf job)
throws AccumuloException, AccumuloSecurityException, IOException {
Level l = AccumuloIndexedOutputFormat.getLogLevel(job);
if (l != null) {
LOG.setLevel(AccumuloIndexedOutputFormat.getLogLevel(job));
}
this.isStringEncoded = AccumuloIndexedOutputFormat.getStringEncoding(job).booleanValue();
this.simulate = AccumuloIndexedOutputFormat.getSimulationMode(job).booleanValue();
this.createTables = AccumuloIndexedOutputFormat.canCreateTables(job).booleanValue();
if (this.simulate) {
LOG.info("Simulating output only. No writes to tables will occur");
}
this.bws = new HashMap();
String tname = AccumuloIndexedOutputFormat.getDefaultTableName(job);
this.defaultTableName = tname == null ? null : new Text(tname);
String iname = AccumuloIndexedOutputFormat.getIndexTableName(job);
if (iname != null) {
LOG.info("Index Table = " + iname);
this.indexTableName = new Text(iname);
this.indexDef = createIndexDefinition(job, tname, iname);
}
if (!this.simulate) {
this.conn = AccumuloIndexedOutputFormat.getInstance(job)
.getConnector(AccumuloIndexedOutputFormat.getPrincipal(job),
AccumuloIndexedOutputFormat.getAuthenticationToken(job));
this.mtbw = this.conn.createMultiTableBatchWriter(
AccumuloIndexedOutputFormat.getBatchWriterOptions(job));
}
}
AccumuloIndexDefinition createIndexDefinition(JobConf job, String tname, String iname) {
AccumuloIndexDefinition def = new AccumuloIndexDefinition(tname, iname);
String cols = AccumuloIndexedOutputFormat.getIndexColumns(job);
LOG.info("Index Cols = " + cols);
def.setColumnTuples(cols);
return def;
}
public void write(Text table, Mutation mutation) throws IOException {
if(table == null || table.toString().isEmpty()) {
table = this.defaultTableName;
}
if(!this.simulate && table == null) {
throw new IOException("No table or default table specified. Try simulation mode next time");
} else {
++this.mutCount;
this.valCount += (long)mutation.size();
this.printMutation(table, mutation);
if(!this.simulate) {
if(!this.bws.containsKey(table)) {
try {
this.addTable(table);
} catch (Exception var5) {
LOG.error(var5);
throw new IOException(var5);
}
}
if(indexTableName != null && !this.bws.containsKey(indexTableName)) {
try {
this.addTable(indexTableName);
} catch (Exception var6) {
LOG.error(var6);
throw new IOException(var6);
}
}
try {
((BatchWriter)this.bws.get(table)).addMutation(mutation);
} catch (MutationsRejectedException var4) {
throw new IOException(var4);
}
// if this table has an associated index table then attempt to build
// index mutations
if (indexTableName != null) {
List<Mutation> idxMuts = getIndexMutations(mutation);
if (!idxMuts.isEmpty()) {
try {
BatchWriter writer = this.bws.get(indexTableName);
for (Mutation m : idxMuts) {
writer.addMutation(m);
}
} catch (MutationsRejectedException var4) {
throw new IOException(var4);
}
}
}
}
}
}
public void addTable(Text tableName) throws AccumuloException, AccumuloSecurityException {
if(this.simulate) {
LOG.info("Simulating adding table: " + tableName);
} else {
LOG.debug("Adding table: " + tableName);
BatchWriter bw = null;
String table = tableName.toString();
if(this.createTables && !this.conn.tableOperations().exists(table)) {
try {
this.conn.tableOperations().create(table);
} catch (AccumuloSecurityException var8) {
LOG.error("Accumulo security violation creating " + table, var8);
throw var8;
} catch (TableExistsException var9) {
LOG.warn("Table Exists " + table, var9);
}
}
try {
bw = this.mtbw.getBatchWriter(table);
} catch (TableNotFoundException var5) {
LOG.error("Accumulo table " + table + " doesn't exist and cannot be created.", var5);
throw new AccumuloException(var5);
}
if(bw != null) {
this.bws.put(tableName, bw);
}
}
}
private int printMutation(Text table, Mutation m) {
if(LOG.isTraceEnabled()) {
LOG.trace(String.format("Table %s row key: %s",
new Object[]{table, this.hexDump(m.getRow())}));
Iterator itr = m.getUpdates().iterator();
while(itr.hasNext()) {
ColumnUpdate cu = (ColumnUpdate)itr.next();
LOG.trace(String.format("Table %s column: %s:%s",
new Object[]{table, this.hexDump(cu.getColumnFamily()),
this.hexDump(cu.getColumnQualifier())}));
LOG.trace(String.format("Table %s security: %s",
new Object[]{table, (new ColumnVisibility(cu.getColumnVisibility())).toString()}));
LOG.trace(String.format("Table %s value: %s",
new Object[]{table, this.hexDump(cu.getValue())}));
}
}
return m.getUpdates().size();
}
private List<Mutation> getIndexMutations(Mutation baseMut) {
List indexMuts = new ArrayList<Mutation>();
// nothing to do if there is not a index definition for this table
if (null != indexDef) {
byte[] rowId = baseMut.getRow();
for (ColumnUpdate cu : baseMut.getUpdates()) {
String cf = new String(cu.getColumnFamily());
String cq = new String(cu.getColumnQualifier());
// if this columnFamily/columnQualifier pair is defined in the index build a new mutation
// so key=value, cf=columnFamily_columnQualifer, cq=rowKey, cv=columnVisibility value=[]
String colType = indexDef.getColType(cf, cq);
if (colType != null) {
LOG.trace(String.format("Building index for column %s:%s", new Object[]{cf, cq}));
Mutation m = new Mutation(AccumuloIndexLexicoder.encodeValue(cu.getValue(), colType,
isStringEncoded));
String colFam = cf + "_" + cq;
m.put(colFam.getBytes(), rowId, new ColumnVisibility(cu.getColumnVisibility()),
EMPTY_BYTES);
indexMuts.add(m);
}
}
}
return indexMuts;
}
private String hexDump(byte[] ba) {
StringBuilder sb = new StringBuilder();
byte[] arr = ba;
int len = ba.length;
for(int i = 0; i < len; ++i) {
byte b = arr[i];
if(b > 32 && b < 126) {
sb.append((char)b);
} else {
sb.append(String.format("x%02x", new Object[]{Byte.valueOf(b)}));
}
}
return sb.toString();
}
public void close(Reporter reporter) throws IOException {
LOG.debug("mutations written: " + this.mutCount + ", values written: " + this.valCount);
if(!this.simulate) {
try {
this.mtbw.close();
} catch (MutationsRejectedException var7) {
if(var7.getAuthorizationFailuresMap().size() >= 0) {
Map tables = new HashMap();
Map.Entry ke;
Object secCodes;
for(Iterator itr = var7.getAuthorizationFailuresMap().entrySet().iterator();
itr.hasNext(); ((Set)secCodes).addAll((Collection)ke.getValue())) {
ke = (Map.Entry)itr.next();
secCodes = (Set)tables.get(((KeyExtent)ke.getKey()).getTableId().toString());
if(secCodes == null) {
secCodes = new HashSet();
tables.put(((KeyExtent)ke.getKey()).getTableId().toString(), secCodes);
}
}
LOG.error("Not authorized to write to tables : " + tables);
}
if(var7.getConstraintViolationSummaries().size() > 0) {
LOG.error("Constraint violations : " + var7.getConstraintViolationSummaries().size());
}
throw new IOException(var7);
}
}
}
}
}