/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.llap.io.decode;
import java.util.concurrent.Callable;
import org.apache.hadoop.hive.common.Pool;
import org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch;
import org.apache.hadoop.hive.llap.ConsumerFeedback;
import org.apache.hadoop.hive.llap.DebugUtils;
import org.apache.hadoop.hive.llap.io.api.impl.ColumnVectorBatch;
import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl;
import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics;
import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer;
import org.apache.hive.common.util.FixedSizedObjectPool;
import org.apache.orc.TypeDescription;
public abstract class EncodedDataConsumer<BatchKey, BatchType extends EncodedColumnBatch<BatchKey>>
implements Consumer<BatchType>, ReadPipeline {
private volatile boolean isStopped = false;
private ConsumerFeedback<BatchType> upstreamFeedback;
private final Consumer<ColumnVectorBatch> downstreamConsumer;
private Callable<Void> readCallable;
private final LlapDaemonIOMetrics ioMetrics;
// Note that the pool is per EDC - within EDC, CVBs are expected to have the same schema.
private final static int CVB_POOL_SIZE = 128;
protected final FixedSizedObjectPool<ColumnVectorBatch> cvbPool;
public EncodedDataConsumer(Consumer<ColumnVectorBatch> consumer, final int colCount,
LlapDaemonIOMetrics ioMetrics) {
this.downstreamConsumer = consumer;
this.ioMetrics = ioMetrics;
cvbPool = new FixedSizedObjectPool<ColumnVectorBatch>(CVB_POOL_SIZE,
new Pool.PoolObjectHelper<ColumnVectorBatch>() {
@Override
public ColumnVectorBatch create() {
return new ColumnVectorBatch(colCount);
}
@Override
public void resetBeforeOffer(ColumnVectorBatch t) {
// Don't reset anything, we are reusing column vectors.
}
});
}
public void init(ConsumerFeedback<BatchType> upstreamFeedback,
Callable<Void> readCallable) {
this.upstreamFeedback = upstreamFeedback;
this.readCallable = readCallable;
}
@Override
public Callable<Void> getReadCallable() {
return readCallable;
}
@Override
public void consumeData(BatchType data) {
if (isStopped) {
returnSourceData(data);
return;
}
long start = System.currentTimeMillis();
try {
decodeBatch(data, downstreamConsumer);
} catch (Throwable ex) {
// This probably should not happen; but it does... at least also stop the consumer.
LlapIoImpl.LOG.error("decodeBatch threw", ex);
downstreamConsumer.setError(ex);
throw ex;
} finally {
long end = System.currentTimeMillis();
ioMetrics.addDecodeBatchTime(end - start);
}
returnSourceData(data);
}
/**
* Returns the ECB to caller for reuse. Only safe to call if the thread is the only owner
* of the ECB in question; or, if ECB is still in pendingData, pendingData must be locked.
*/
private void returnSourceData(BatchType data) {
upstreamFeedback.returnData(data);
}
protected abstract void decodeBatch(BatchType batch,
Consumer<ColumnVectorBatch> downstreamConsumer);
@Override
public void setDone() {
downstreamConsumer.setDone();
}
@Override
public void setError(Throwable t) {
downstreamConsumer.setError(t);
}
@Override
public void returnData(ColumnVectorBatch data) {
cvbPool.offer(data);
}
@Override
public void stop() {
upstreamFeedback.stop();
this.isStopped = true;
}
@Override
public void pause() {
// We are just a relay; send pause to encoded data producer.
upstreamFeedback.pause();
}
@Override
public void unpause() {
// We are just a relay; send unpause to encoded data producer.
upstreamFeedback.unpause();
}
}