/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.llap.io.encoded;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ConcurrentLinkedQueue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.llap.DebugUtils;
import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl;
import org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.CacheWriter;
import org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.DeserializerOrcWriter;
import org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.EncodingWriter;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.orc.Writer;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
/** The class that writes rows from a text reader to an ORC writer using VectorDeserializeRow. */
class VectorDeserializeOrcWriter extends EncodingWriter implements Runnable {
private final VectorizedRowBatchCtx vrbCtx;
private Writer orcWriter;
private final LazySimpleDeserializeRead deserializeRead;
private final VectorDeserializeRow<?> vectorDeserializeRow;
private final StructObjectInspector destinationOi;
private final boolean usesSourceIncludes;
private final List<Integer> sourceIncludes;
private final boolean isAsync;
private final Thread orcThread;
private final ConcurrentLinkedQueue<WriteOperation> queue;
private AsyncCallback completion;
// Stored here only as async operation context.
private final boolean[] cacheIncludes;
private VectorizedRowBatch sourceBatch, destinationBatch;
private List<VectorizedRowBatch> currentBatches;
// TODO: if more writers are added, separate out an EncodingWriterFactory
public static EncodingWriter create(InputFormat<?, ?> sourceIf, Deserializer serDe,
Map<Path, PartitionDesc> parts, Configuration daemonConf, Configuration jobConf,
Path splitPath, StructObjectInspector sourceOi, List<Integer> sourceIncludes,
boolean[] cacheIncludes, int allocSize) throws IOException {
// Vector SerDe can be disabled both on client and server side.
if (!HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED)
|| !HiveConf.getBoolVar(jobConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED)
|| !(sourceIf instanceof TextInputFormat) || !(serDe instanceof LazySimpleSerDe)) {
return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
}
Path path = splitPath.getFileSystem(daemonConf).makeQualified(splitPath);
PartitionDesc partDesc = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
parts, path, null);
if (partDesc == null) {
LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: no partition desc for " + path);
return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
}
Properties tblProps = partDesc.getTableDesc().getProperties();
if ("true".equalsIgnoreCase(tblProps.getProperty(
serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST))) {
LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter due to "
+ serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST);
return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
}
for (StructField sf : sourceOi.getAllStructFieldRefs()) {
Category c = sf.getFieldObjectInspector().getCategory();
if (c != Category.PRIMITIVE) {
LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: " + c + " is not supported");
return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
}
}
LlapIoImpl.LOG.info("Creating VertorDeserializeOrcWriter for " + path);
return new VectorDeserializeOrcWriter(
daemonConf, tblProps, sourceOi, sourceIncludes, cacheIncludes, allocSize);
}
private VectorDeserializeOrcWriter(Configuration conf, Properties tblProps,
StructObjectInspector sourceOi, List<Integer> sourceIncludes, boolean[] cacheIncludes,
int allocSize) throws IOException {
super(sourceOi, allocSize);
// See also: the usage of VectorDeserializeType, for binary. For now, we only want text.
this.vrbCtx = createVrbCtx(sourceOi);
this.sourceIncludes = sourceIncludes;
this.cacheIncludes = cacheIncludes;
this.sourceBatch = vrbCtx.createVectorizedRowBatch();
deserializeRead = new LazySimpleDeserializeRead(vrbCtx.getRowColumnTypeInfos(),
/* useExternalBuffer */ true, createSerdeParams(conf, tblProps));
vectorDeserializeRow = new VectorDeserializeRow<LazySimpleDeserializeRead>(deserializeRead);
int colCount = vrbCtx.getRowColumnTypeInfos().length;
boolean[] includes = null;
this.usesSourceIncludes = sourceIncludes.size() < colCount;
if (usesSourceIncludes) {
// VectorDeserializeRow produces "sparse" VRB when includes are used; we need to write the
// "dense" VRB to ORC. Ideally, we'd use projection columns, but ORC writer doesn't use them.
// In any case, we would also need to build a new OI for OrcWriter config.
// This is why OrcWriter is created after this writer, by the way.
this.destinationBatch = new VectorizedRowBatch(sourceIncludes.size());
includes = new boolean[colCount];
int inclBatchIx = 0;
List<String> childNames = new ArrayList<>(sourceIncludes.size());
List<ObjectInspector> childOis = new ArrayList<>(sourceIncludes.size());
List<? extends StructField> sourceFields = sourceOi.getAllStructFieldRefs();
for (Integer columnId : sourceIncludes) {
includes[columnId] = true;
assert inclBatchIx <= columnId;
// Note that we use the same vectors in both batches. Clever, very clever.
destinationBatch.cols[inclBatchIx++] = sourceBatch.cols[columnId];
StructField sourceField = sourceFields.get(columnId);
childNames.add(sourceField.getFieldName());
childOis.add(sourceField.getFieldObjectInspector());
}
// This is only used by ORC to derive the structure. Most fields are unused.
destinationOi = new LazySimpleStructObjectInspector(
childNames, childOis, null, (byte)0, null);
destinationBatch.setPartitionInfo(sourceIncludes.size(), 0);
if (LlapIoImpl.LOG.isDebugEnabled()) {
LlapIoImpl.LOG.debug("Includes for deserializer are " + DebugUtils.toString(includes));
}
try {
vectorDeserializeRow.init(includes);
} catch (HiveException e) {
throw new IOException(e);
}
} else {
// No includes - use the standard batch.
this.destinationBatch = sourceBatch;
this.destinationOi = sourceOi;
try {
vectorDeserializeRow.init();
} catch (HiveException e) {
throw new IOException(e);
}
}
this.isAsync = HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ASYNC_ENABLED);
if (isAsync) {
currentBatches = new LinkedList<>();
queue = new ConcurrentLinkedQueue<>();
orcThread = new Thread(this);
orcThread.setDaemon(true);
orcThread.setName(Thread.currentThread().getName() + "-OrcEncode");
} else {
queue = null;
orcThread = null;
currentBatches = null;
}
}
public void startAsync(AsyncCallback callback) {
this.completion = callback;
this.orcThread.start();
}
private static VectorizedRowBatchCtx createVrbCtx(StructObjectInspector oi) throws IOException {
VectorizedRowBatchCtx vrbCtx = new VectorizedRowBatchCtx();
try {
vrbCtx.init(oi, new String[0]);
} catch (HiveException e) {
throw new IOException(e);
}
return vrbCtx;
}
private static LazySerDeParameters createSerdeParams(
Configuration conf, Properties tblProps) throws IOException {
try {
return new LazySerDeParameters(conf, tblProps, LazySimpleSerDe.class.getName());
} catch (SerDeException e) {
throw new IOException(e);
}
}
@Override
public void init(CacheWriter cacheWriter, Configuration conf, Path path) throws IOException {
this.orcWriter = super.createOrcWriter(cacheWriter, conf, path, destinationOi);
this.cacheWriter = cacheWriter;
}
public interface AsyncCallback {
void onComplete(VectorDeserializeOrcWriter writer);
}
@Override
public void run() {
while (true) {
WriteOperation op = null;
int fallbackMs = 8;
while (true) {
op = queue.poll();
if (op != null) break;
if (fallbackMs > 262144) { // Arbitrary... we don't expect caller to hang out for 7+ mins.
LlapIoImpl.LOG.error("ORC encoder timed out waiting for input");
discardData();
return;
}
try {
Thread.sleep(fallbackMs);
} catch (InterruptedException e) {
LlapIoImpl.LOG.error("ORC encoder interrupted waiting for input");
discardData();
return;
}
fallbackMs <<= 1;
}
try {
if (op.apply(orcWriter, cacheWriter)) {
LlapIoImpl.LOG.info("ORC encoder received a exit event");
completion.onComplete(this);
return;
}
} catch (Exception e) {
LlapIoImpl.LOG.error("ORC encoder failed", e);
discardData();
return;
}
}
}
private void discardData() {
try {
cacheWriter.discardData();
} catch (Exception ex) {
LlapIoImpl.LOG.error("Failed to close an async cache writer", ex);
}
}
@Override
public void writeOneRow(Writable row) throws IOException {
if (sourceBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
flushBatch();
}
BinaryComparable binComp = (BinaryComparable)row;
deserializeRead.set(binComp.getBytes(), 0, binComp.getLength());
// Deserialize and append new row using the current batch size as the index.
try {
// Not using ByRef now since it's unsafe for text readers. Might be safe for others.
vectorDeserializeRow.deserialize(sourceBatch, sourceBatch.size++);
} catch (Exception e) {
throw new IOException("DeserializeRead detail: "
+ vectorDeserializeRow.getDetailedReadPositionString(), e);
}
}
private void flushBatch() throws IOException {
addBatchToWriter();
if (!isAsync) {
for (int c = 0; c < sourceBatch.cols.length; ++c) {
// This resets vectors in both batches.
ColumnVector colVector = sourceBatch.cols[c];
if (colVector != null) {
colVector.reset();
colVector.init();
}
}
sourceBatch.selectedInUse = false;
sourceBatch.size = 0;
sourceBatch.endOfFile = false;
propagateSourceBatchFieldsToDest();
} else {
// In addBatchToWriter, we have passed the batch to both ORC and operator pipeline
// (neither ever changes the vectors). We'd need a set of vectors batch to write to.
// TODO: for now, create this from scratch. Ideally we should return the vectors from ops.
// We could also have the ORC thread create it for us in its spare time...
this.sourceBatch = vrbCtx.createVectorizedRowBatch();
if (usesSourceIncludes) {
this.destinationBatch = new VectorizedRowBatch(sourceIncludes.size());
int inclBatchIx = 0;
for (Integer columnId : sourceIncludes) {
destinationBatch.cols[inclBatchIx++] = sourceBatch.cols[columnId];
}
destinationBatch.setPartitionInfo(sourceIncludes.size(), 0);
} else {
this.destinationBatch = sourceBatch;
}
}
}
private void propagateSourceBatchFieldsToDest() {
if (destinationBatch == sourceBatch) return;
destinationBatch.selectedInUse = sourceBatch.selectedInUse;
destinationBatch.size = sourceBatch.size;
destinationBatch.endOfFile = sourceBatch.endOfFile;
}
private void addBatchToWriter() throws IOException {
propagateSourceBatchFieldsToDest();
if (!isAsync) {
orcWriter.addRowBatch(destinationBatch);
} else {
currentBatches.add(destinationBatch);
addWriteOp(new VrbOperation(destinationBatch));
}
}
@Override
public void flushIntermediateData() throws IOException {
if (sourceBatch.size > 0) {
flushBatch();
}
}
@Override
public void writeIntermediateFooter() throws IOException {
if (isAsync) {
addWriteOp(new IntermediateFooterOperation());
} else {
orcWriter.writeIntermediateFooter();
}
}
private void addWriteOp(WriteOperation wo) throws AssertionError {
if (queue.offer(wo)) return;
throw new AssertionError("Queue full"); // This should never happen with linked list queue.
}
@Override
public void setCurrentStripeOffsets(long currentKnownTornStart,
long firstStartOffset, long lastStartOffset, long fileOffset) {
if (isAsync) {
addWriteOp(new SetStripeDataOperation(
currentKnownTornStart, firstStartOffset, lastStartOffset, fileOffset));
} else {
cacheWriter.setCurrentStripeOffsets(
currentKnownTornStart, firstStartOffset, lastStartOffset, fileOffset);
}
}
@Override
public void close() throws IOException {
if (sourceBatch.size > 0) {
addBatchToWriter();
}
if (!isAsync) {
orcWriter.close();
} else {
addWriteOp(new CloseOperation());
}
}
public List<VectorizedRowBatch> extractCurrentVrbs() {
if (!isAsync) return null;
List<VectorizedRowBatch> result = currentBatches;
currentBatches = new LinkedList<>();
return result;
}
private static interface WriteOperation {
boolean apply(Writer writer, CacheWriter cacheWriter) throws IOException;
}
private static class VrbOperation implements WriteOperation {
private VectorizedRowBatch batch;
public VrbOperation(VectorizedRowBatch batch) {
// LlapIoImpl.LOG.debug("Adding batch " + batch);
this.batch = batch;
}
@Override
public boolean apply(Writer writer, CacheWriter cacheWriter) throws IOException {
// LlapIoImpl.LOG.debug("Writing batch " + batch);
writer.addRowBatch(batch);
return false;
}
}
private static class IntermediateFooterOperation implements WriteOperation {
@Override
public boolean apply(Writer writer, CacheWriter cacheWriter) throws IOException {
writer.writeIntermediateFooter();
return false;
}
}
private static class SetStripeDataOperation implements WriteOperation {
private final long currentKnownTornStart, firstStartOffset, lastStartOffset, fileOffset;
public SetStripeDataOperation(long currentKnownTornStart,
long firstStartOffset, long lastStartOffset, long fileOffset) {
this.currentKnownTornStart = currentKnownTornStart;
this.firstStartOffset = firstStartOffset;
this.lastStartOffset = lastStartOffset;
this.fileOffset = fileOffset;
}
@Override
public boolean apply(Writer writer, CacheWriter cacheWriter) throws IOException {
cacheWriter.setCurrentStripeOffsets(
currentKnownTornStart, firstStartOffset, lastStartOffset, fileOffset);
return false;
}
}
private static class CloseOperation implements WriteOperation {
@Override
public boolean apply(Writer writer, CacheWriter cacheWriter) throws IOException {
writer.close();
return true; // The thread should stop after this.
}
}
public boolean[] getOriginalCacheIncludes() {
return cacheIncludes;
}
@Override
public boolean isOnlyWritingIncludedColumns() {
return usesSourceIncludes;
}
public void interrupt() {
assert orcThread != null;
orcThread.interrupt();
}
}