package edu.washington.escience.myria.operator.network.distribute; import java.util.BitSet; import javax.annotation.Nonnull; import com.google.common.base.Preconditions; import edu.washington.escience.myria.storage.TupleBatch; import edu.washington.escience.myria.util.HashUtils; import edu.washington.escience.myria.util.MyriaArrayUtils; /** * The partition of a tuple is decided by the hash code of a group of fields of the tuple. */ public final class HashPartitionFunction extends PartitionFunction { /** Required for Java serialization. */ private static final long serialVersionUID = 1L; /** The indices used for partitioning. */ private final int[] indexes; /** The index of the chosen hashcode in <code>HashUtils</code>. */ private final int seedIndex; /** * @param indexes the indices used for partitioning. */ public HashPartitionFunction(final int[] indexes) { this(indexes, 0); } /** * @param indexes the indices used for partitioning. * @param seedIndex the index of chosen hash seed. */ public HashPartitionFunction(final int[] indexes, final int seedIndex) { Preconditions.checkArgument( indexes.length > 0, "HashPartitionFunction requires at least 1 field to hash"); for (int i = 0; i < indexes.length; ++i) { Preconditions.checkArgument( indexes[i] >= 0, "HashPartitionFunction field index %s cannot take negative value %s", i, indexes[i]); } MyriaArrayUtils.checkSet(indexes); this.indexes = indexes; this.seedIndex = seedIndex % HashUtils.NUM_OF_HASHFUNCTIONS; } /** * @return the field indexes on which tuples will be hash partitioned. */ public int[] getIndexes() { return indexes; } @Override public TupleBatch[] partition(@Nonnull final TupleBatch tb) { BitSet[] partitions = new BitSet[numPartitions()]; for (int i = 0; i < partitions.length; ++i) { partitions[i] = new BitSet(); } for (int i = 0; i < tb.numTuples(); i++) { int p = Math.floorMod(HashUtils.hashSubRow(tb, indexes, i, seedIndex), numPartitions()); partitions[p].set(i); } TupleBatch[] tbs = new TupleBatch[numPartitions()]; for (int i = 0; i < tbs.length; ++i) { tbs[i] = tb.filter(partitions[i]); } return tbs; } }