package edu.washington.escience.myria.operator;
import java.util.BitSet;
import java.util.Objects;
import javax.annotation.Nullable;
import com.google.common.collect.ImmutableMap;
import edu.washington.escience.myria.Schema;
import edu.washington.escience.myria.storage.TupleBatch;
import edu.washington.escience.myria.storage.TupleUtils;
/**
* A Duplicate Elimination Operator that works on ordered input.
*/
public final class OrderedDupElim extends UnaryOperator {
/** Required for Java serialization. */
private static final long serialVersionUID = 1L;
/** The tuple batch containing the last emitted tuple. */
private TupleBatch lastTupleBatch = null;
/** The row of the last emitted tuple in the last emitted tuple batch. */
private int lastTupleRow;
/**
* The order in which columns of the input tuple are scanned. So named because we want to scan in the reverse order
* that columns were sorted to find differences as quickly as possible.
*/
private int[] invSortColumns;
/**
* A duplicate elimination operator that works on ordered input. This constructor assumes that the child columns were
* sorted in the same order in which they were input.
*
* @param child the source of the tuples.
*/
public OrderedDupElim(final Operator child) {
this(child, null);
}
/**
* A duplicate elimination operator that works on ordered input. If present, <code>sortColumns</code> specifies the
* order in which the columns of the input data were sorted, and will look for differences from the last column to the
* first.
*
* @param child the source of the tuples.
* @param sortColumns the order in which the columns of the input tuples are sorted.
*/
public OrderedDupElim(final Operator child, @Nullable final int[] sortColumns) {
super(child);
invSortColumns = reverseArrayOrNull(sortColumns);
}
/**
* Utility function to reverse an int array.
*
* @param input the array to be flipped, which may be null.
* @return a copy of input with the elements reversed, or null.
*/
private int[] reverseArrayOrNull(@Nullable final int[] input) {
if (input == null) {
return null;
}
int[] output = new int[input.length];
for (int i = 0; i < input.length; ++i) {
output[i] = input[input.length - i - 1];
}
return output;
}
@Override
protected void init(final ImmutableMap<String, Object> execEnvVars) throws Exception {
Schema schema = Objects.requireNonNull(getSchema());
/* Assume the columns are sorted in order by default. */
if (invSortColumns == null) {
int numColumns = schema.numColumns();
invSortColumns = new int[numColumns];
for (int i = 0; i < numColumns; ++i) {
invSortColumns[i] = numColumns - i - 1;
}
}
};
@Override
protected TupleBatch fetchNextReady() throws Exception {
TupleBatch tb = getChild().nextReady();
if (tb == null) {
return null;
}
BitSet output = new BitSet(tb.numTuples());
for (int row = 0; row < tb.numTuples(); ++row) {
if (lastTupleBatch == null
|| !TupleUtils.tupleEquals(
tb, invSortColumns, row, lastTupleBatch, invSortColumns, lastTupleRow)) {
output.set(row);
lastTupleBatch = tb;
lastTupleRow = row;
}
}
return tb.filter(output);
}
@Override
protected Schema generateSchema() {
Operator child = getChild();
if (child == null) {
return null;
}
return child.getSchema();
}
}