/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.blur.hive;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.blur.manager.BlurPartitioner;
import org.apache.blur.mapreduce.lib.BlurColumn;
import org.apache.blur.mapreduce.lib.BlurOutputFormat;
import org.apache.blur.mapreduce.lib.BlurRecord;
import org.apache.blur.thirdparty.thrift_0_9_0.TException;
import org.apache.blur.thrift.BlurClient;
import org.apache.blur.thrift.generated.Blur.Iface;
import org.apache.blur.thrift.generated.BlurException;
import org.apache.blur.thrift.generated.Column;
import org.apache.blur.thrift.generated.Record;
import org.apache.blur.thrift.generated.RecordMutation;
import org.apache.blur.thrift.generated.RecordMutationType;
import org.apache.blur.thrift.generated.RowMutation;
import org.apache.blur.thrift.generated.RowMutationType;
import org.apache.blur.thrift.generated.TableDescriptor;
import org.apache.blur.utils.BlurConstants;
import org.apache.blur.utils.ShardUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Progressable;
public class BlurHiveOutputFormat implements HiveOutputFormat<Text, BlurRecord> {
private static final String BLUR_USER_PROXY = "blur.user.proxy";
private static final String BLUR = "blur";
private static final String BLUR_USER_NAME = "blur.user.name";
private static final String BLUR_BULK_MUTATE_ID = "blur.bulk.mutate.id";
public static String getBulkId(Configuration conf) {
return conf.get(BLUR_BULK_MUTATE_ID);
}
public static void setBulkId(Configuration conf, String bulkId) {
conf.set(BLUR_BULK_MUTATE_ID, bulkId);
}
@Override
public void checkOutputSpecs(FileSystem fileSystem, JobConf jobConf) throws IOException {
}
@Override
public RecordWriter<Text, BlurRecord> getRecordWriter(FileSystem fileSystem, JobConf jobConf, String name,
Progressable progressable) throws IOException {
throw new RuntimeException("Should never be called.");
}
@Override
public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc,
Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
Progressable progress) throws IOException {
if (BlurSerDe.shouldUseMRWorkingPath(jc)) {
return getMrWorkingPathWriter(jc);
}
return getBulkRecordWriter(jc);
}
private org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getMrWorkingPathWriter(
final Configuration configuration) throws IOException {
PrivilegedExceptionAction<org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter> privilegedExceptionAction = new PrivilegedExceptionAction<org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter>() {
@Override
public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter run() throws Exception {
String workingPathStr = configuration.get(BlurConstants.BLUR_BULK_UPDATE_WORKING_PATH);
Path workingPath = new Path(workingPathStr);
Path tmpDir = new Path(workingPath, "tmp");
FileSystem fileSystem = tmpDir.getFileSystem(configuration);
String loadId = configuration.get(BlurSerDe.BLUR_MR_LOAD_ID);
Path loadPath = new Path(tmpDir, loadId);
final Writer writer = new SequenceFile.Writer(fileSystem, configuration, new Path(loadPath, UUID.randomUUID()
.toString()), Text.class, BlurRecord.class);
return new org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter() {
@Override
public void write(Writable w) throws IOException {
BlurRecord blurRecord = (BlurRecord) w;
String rowId = blurRecord.getRowId();
writer.append(new Text(rowId), blurRecord);
}
@Override
public void close(boolean abort) throws IOException {
writer.close();
}
};
}
};
UserGroupInformation userGroupInformation = getUGI(configuration);
try {
return userGroupInformation.doAs(privilegedExceptionAction);
} catch (InterruptedException e) {
throw new IOException(e);
}
}
public static UserGroupInformation getUGI(final Configuration configuration) throws IOException {
String user = getBlurUser(configuration);
UserGroupInformation userGroupInformation;
UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
if (user.equals(currentUser.getUserName())) {
userGroupInformation = currentUser;
} else {
if (BlurHiveOutputFormat.isBlurUserAsProxy(configuration)) {
userGroupInformation = UserGroupInformation.createProxyUser(user, currentUser);
} else {
userGroupInformation = UserGroupInformation.createRemoteUser(user);
}
}
return userGroupInformation;
}
public static boolean isBlurUserAsProxy(Configuration configuration) {
return configuration.getBoolean(BLUR_USER_PROXY, false);
}
public static void setBlurUserAsProxy(Configuration configuration, boolean blurUserProxy) {
configuration.setBoolean(BLUR_USER_PROXY, blurUserProxy);
}
public static String getBlurUser(Configuration configuration) {
return configuration.get(BLUR_USER_NAME, BLUR);
}
public static void setBlurUser(Configuration configuration, String blurUser) {
configuration.set(BLUR_USER_NAME, blurUser);
}
private org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getBulkRecordWriter(Configuration configuration)
throws IOException {
TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(configuration);
String conStr = configuration.get(BlurSerDe.BLUR_CONTROLLER_CONNECTION_STR);
final Iface controllerClient = BlurClient.getClient(conStr);
final String table = tableDescriptor.getName();
final int numberOfShardsInTable = tableDescriptor.getShardCount();
final String bulkId = getBulkId(configuration);
return new org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter() {
private BlurPartitioner _blurPartitioner = new BlurPartitioner();
private Map<String, List<RowMutation>> _serverBatches = new ConcurrentHashMap<String, List<RowMutation>>();
private int _capacity = 100;
private Map<String, String> _shardToServerLayout;
@Override
public void write(Writable w) throws IOException {
BlurRecord blurRecord = (BlurRecord) w;
String rowId = blurRecord.getRowId();
RowMutation rowMutation = new RowMutation();
rowMutation.setTable(table);
rowMutation.setRowId(rowId);
rowMutation.setRowMutationType(RowMutationType.UPDATE_ROW);
rowMutation.addToRecordMutations(new RecordMutation(RecordMutationType.REPLACE_ENTIRE_RECORD,
toRecord(blurRecord)));
try {
String server = getServer(rowId);
List<RowMutation> batch = _serverBatches.get(server);
if (batch == null) {
_serverBatches.put(server, batch = new ArrayList<RowMutation>(_capacity));
}
batch.add(rowMutation);
checkForFlush(_capacity);
} catch (BlurException e) {
throw new IOException(e);
} catch (TException e) {
throw new IOException(e);
}
}
@Override
public void close(boolean abort) throws IOException {
try {
checkForFlush(1);
} catch (BlurException e) {
throw new IOException(e);
} catch (TException e) {
throw new IOException(e);
}
}
private void checkForFlush(int max) throws BlurException, TException {
for (Entry<String, List<RowMutation>> e : _serverBatches.entrySet()) {
String server = e.getKey();
List<RowMutation> batch = e.getValue();
if (batch.size() >= max) {
Iface client = BlurClient.getClient(server);
client.bulkMutateAddMultiple(bulkId, batch);
batch.clear();
}
}
}
private String getServer(String rowId) throws BlurException, TException {
int shard = _blurPartitioner.getShard(rowId, numberOfShardsInTable);
String shardId = ShardUtil.getShardName(shard);
return getServerFromShardId(table, shardId);
}
private String getServerFromShardId(String table, String shardId) throws BlurException, TException {
if (_shardToServerLayout == null) {
_shardToServerLayout = controllerClient.shardServerLayout(table);
}
return _shardToServerLayout.get(shardId);
}
};
}
protected Record toRecord(BlurRecord blurRecord) {
return new Record(blurRecord.getRecordId(), blurRecord.getFamily(), toColumns(blurRecord.getColumns()));
}
private List<Column> toColumns(List<BlurColumn> columns) {
List<Column> result = new ArrayList<Column>();
for (BlurColumn blurColumn : columns) {
result.add(new Column(blurColumn.getName(), blurColumn.getValue()));
}
return result;
}
}