/* (c) 2014 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package com.linkedin.cubert.block; import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import com.linkedin.cubert.operator.TupleOperator; import com.linkedin.cubert.plan.physical.PerfProfiler; import com.linkedin.cubert.utils.TupleUtils; /** * Similar to TupleOperatorBlock, but buffer the output tuples from operator for * performance profile. */ public class BufferedTupleOperatorBlock extends TupleOperatorBlock { private static final int BUFFER_SIZE = 1000; private static final int NUM_BATCH_TO_REPORT = 100; protected Tuple[] outputBuffer; private int bufferSize; private int bufferPointer; private boolean lastBatch; private boolean shallowCopy; private boolean isOutputBlock; private int numBatchSinceLastReport; // Only valid for output block. private PerfProfiler profiler; // Only valid for output block. private LongWritable cumulativeElapsedTime; public BufferedTupleOperatorBlock(TupleOperator operator, BlockProperties props, LongWritable cumulativeElapsedTime) { super(operator, props); outputBuffer = new Tuple[BUFFER_SIZE]; bufferSize = 0; bufferPointer = 0; lastBatch = false; this.cumulativeElapsedTime = cumulativeElapsedTime; shallowCopy = props.getSchema().allFieldsAllowShallowCopy(); isOutputBlock = false; numBatchSinceLastReport = 0; profiler = null; // Pre-allocate the space for each tuple. int numCols = props.getSchema().getNumColumns(); for (int i = 0; i < BUFFER_SIZE; i++) outputBuffer[i] = TupleFactory.getInstance().newTuple(numCols); } public void reset(TupleOperator operator) { this.operator = operator; bufferSize = 0; bufferPointer = 0; lastBatch = false; } public void setAsOutputBlock(PerfProfiler profiler) { isOutputBlock = true; this.profiler = profiler; } public void updatePerformanceCounter() { profiler.updatePerformanceCounter(); numBatchSinceLastReport = 0; } @Override public Tuple next() throws IOException, InterruptedException { if (bufferPointer < bufferSize) { // Output buffer is not exhausted, directly return the buffered tuple. Tuple nextTuple = outputBuffer[bufferPointer]; bufferPointer++; return nextTuple; } // Avoid read operator.next() again. if (lastBatch) return null; // Output buffer reset. bufferPointer = 0; bufferSize = 0; long startNanoTime = System.nanoTime(); // Fill up the buffer. for (int i = 0; i < BUFFER_SIZE; i++) { Tuple nextTuple = operator.next(); if (nextTuple == null) { lastBatch = true; break; } if (shallowCopy) TupleUtils.copy(nextTuple, outputBuffer[i]); else TupleUtils.deepCopy(nextTuple, outputBuffer[i]); bufferSize++; } long endNanoTime = System.nanoTime(); cumulativeElapsedTime.set(cumulativeElapsedTime.get() + endNanoTime - startNanoTime); if (isOutputBlock) { numBatchSinceLastReport++; if (numBatchSinceLastReport == NUM_BATCH_TO_REPORT) updatePerformanceCounter(); } // End of block. if (bufferSize == 0) return null; // Return the first tuple in the buffer. bufferPointer++; return outputBuffer[0]; } }