/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import com.google.common.base.Preconditions;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
/**
* This class represents an Or expression. This applies short circuit optimization.
*/
public class FilterExprOrExpr extends VectorExpression {
private static final long serialVersionUID = 1L;
private transient final int[] initialSelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
private transient int[] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE];
private transient int[] unselectedCopy = new int[VectorizedRowBatch.DEFAULT_SIZE];
private transient int[] difference = new int[VectorizedRowBatch.DEFAULT_SIZE];
private transient final int[] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE];
public FilterExprOrExpr() {
super();
}
/**
* Remove (subtract) members from an array and produce the results into
* a difference array.
* @param all
* The selected array containing all members.
* @param allSize
* The size of all.
* @param remove
* The indices to remove. They must all be present in input selected array.
* @param removeSize
* The size of remove.
* @param difference
* The resulting difference -- the all array indices not in the
* remove array.
* @return
* The resulting size of the difference array.
*/
private int subtract(int[] all, int allSize,
int[] remove, int removeSize, int[] difference) {
// UNDONE: Copied from VectorMapJoinOuterGenerateResultOperator.
Preconditions.checkState((all != remove) && (remove != difference) && (difference != all));
// Comment out these checks when we are happy..
if (!verifyMonotonicallyIncreasing(all, allSize)) {
throw new RuntimeException("all is not in sort order and unique");
}
if (!verifyMonotonicallyIncreasing(remove, removeSize)) {
throw new RuntimeException("remove is not in sort order and unique");
}
int differenceCount = 0;
// Determine which rows are left.
int removeIndex = 0;
for (int i = 0; i < allSize; i++) {
int candidateIndex = all[i];
if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) {
removeIndex++;
} else {
difference[differenceCount++] = candidateIndex;
}
}
if (removeIndex != removeSize) {
throw new RuntimeException("Not all batch indices removed");
}
if (!verifyMonotonicallyIncreasing(difference, differenceCount)) {
throw new RuntimeException("difference is not in sort order and unique");
}
return differenceCount;
}
public boolean verifyMonotonicallyIncreasing(int[] selected, int size) {
if (size == 0) {
return true;
}
int prevBatchIndex = selected[0];
for (int i = 1; i < size; i++) {
int batchIndex = selected[i];
if (batchIndex <= prevBatchIndex) {
return false;
}
prevBatchIndex = batchIndex;
}
return true;
}
@Override
public void evaluate(VectorizedRowBatch batch) {
int n = batch.size;
if (n <= 0) {
return;
}
VectorExpression childExpr1 = this.childExpressions[0];
boolean prevSelectInUse = batch.selectedInUse;
// Save the original selected vector
int[] sel = batch.selected;
if (batch.selectedInUse) {
System.arraycopy(sel, 0, initialSelected, 0, n);
} else {
for (int i = 0; i < n; i++) {
initialSelected[i] = i;
sel[i] = i;
}
batch.selectedInUse = true;
}
childExpr1.evaluate(batch);
// Preserve the selected reference and size values generated
// after the first child is evaluated.
int sizeAfterFirstChild = batch.size;
int[] selectedAfterFirstChild = batch.selected;
// Calculate unselected ones in last evaluate.
for (int j = 0; j < n; j++) {
tmp[initialSelected[j]] = 0;
}
for (int j = 0; j < batch.size; j++) {
tmp[selectedAfterFirstChild[j]] = 1;
}
int unselectedSize = 0;
for (int j = 0; j < n; j++) {
int i = initialSelected[j];
if (tmp[i] == 0) {
unselected[unselectedSize++] = i;
}
}
int newSize = sizeAfterFirstChild;
batch.selected = unselected;
batch.size = unselectedSize;
if (unselectedSize > 0) {
// Evaluate subsequent child expression over unselected ones only.
final int childrenCount = this.childExpressions.length;
int childIndex = 1;
while (true) {
boolean isLastChild = (childIndex + 1 >= childrenCount);
// When we have yet another child beyond the current one... save unselected.
if (!isLastChild) {
System.arraycopy(batch.selected, 0, unselectedCopy, 0, unselectedSize);
}
VectorExpression childExpr = this.childExpressions[childIndex];
childExpr.evaluate(batch);
// Merge the result of last evaluate to previous evaluate.
newSize += batch.size;
for (int i = 0; i < batch.size; i++) {
tmp[batch.selected[i]] = 1;
}
if (isLastChild) {
break;
}
unselectedSize = subtract(unselectedCopy, unselectedSize, batch.selected, batch.size,
difference);
if (unselectedSize == 0) {
break;
}
System.arraycopy(difference, 0, batch.selected, 0, unselectedSize);
batch.size = unselectedSize;
childIndex++;
}
}
// Important: Restore the batch's selected array.
batch.selected = selectedAfterFirstChild;
int k = 0;
for (int j = 0; j < n; j++) {
int i = initialSelected[j];
if (tmp[i] == 1) {
batch.selected[k++] = i;
}
}
batch.size = newSize;
if (newSize == n) {
// Filter didn't do anything
batch.selectedInUse = prevSelectInUse;
}
}
@Override
public int getOutputColumn() {
return -1;
}
@Override
public String getOutputType() {
return "boolean";
}
@Override
public String vectorExpressionParameters() {
// The children are input.
return null;
}
@Override
public VectorExpressionDescriptor.Descriptor getDescriptor() {
// IMPORTANT NOTE: For Multi-OR, the VectorizationContext class will catch cases with 3 or
// more parameters...
return (new VectorExpressionDescriptor.Builder())
.setMode(
VectorExpressionDescriptor.Mode.FILTER)
.setNumArguments(2)
.setArgumentTypes(
VectorExpressionDescriptor.ArgumentType.INT_FAMILY,
VectorExpressionDescriptor.ArgumentType.INT_FAMILY)
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
}
}