HashPartitionFunction.java example

Explorer
myria-master
package edu.washington.escience.myria.operator.network.distribute;

import java.util.BitSet;

import javax.annotation.Nonnull;

import com.google.common.base.Preconditions;

import edu.washington.escience.myria.storage.TupleBatch;
import edu.washington.escience.myria.util.HashUtils;
import edu.washington.escience.myria.util.MyriaArrayUtils;

/**
 * The partition of a tuple is decided by the hash code of a group of fields of the tuple.
 */
public final class HashPartitionFunction extends PartitionFunction {

  /** Required for Java serialization. */
  private static final long serialVersionUID = 1L;

  /** The indices used for partitioning. */
  private final int[] indexes;

  /** The index of the chosen hashcode in <code>HashUtils</code>. */
  private final int seedIndex;

  /**
   * @param indexes the indices used for partitioning.
   */
  public HashPartitionFunction(final int[] indexes) {
    this(indexes, 0);
  }

  /**
   * @param indexes the indices used for partitioning.
   * @param seedIndex the index of chosen hash seed.
   */
  public HashPartitionFunction(final int[] indexes, final int seedIndex) {
    Preconditions.checkArgument(
        indexes.length > 0, "HashPartitionFunction requires at least 1 field to hash");
    for (int i = 0; i < indexes.length; ++i) {
      Preconditions.checkArgument(
          indexes[i] >= 0,
          "HashPartitionFunction field index %s cannot take negative value %s",
          i,
          indexes[i]);
    }
    MyriaArrayUtils.checkSet(indexes);
    this.indexes = indexes;
    this.seedIndex = seedIndex % HashUtils.NUM_OF_HASHFUNCTIONS;
  }

  /**
   * @return the field indexes on which tuples will be hash partitioned.
   */
  public int[] getIndexes() {
    return indexes;
  }

  @Override
  public TupleBatch[] partition(@Nonnull final TupleBatch tb) {
    BitSet[] partitions = new BitSet[numPartitions()];
    for (int i = 0; i < partitions.length; ++i) {
      partitions[i] = new BitSet();
    }
    for (int i = 0; i < tb.numTuples(); i++) {
      int p = Math.floorMod(HashUtils.hashSubRow(tb, indexes, i, seedIndex), numPartitions());
      partitions[p].set(i);
    }
    TupleBatch[] tbs = new TupleBatch[numPartitions()];
    for (int i = 0; i < tbs.length; ++i) {
      tbs[i] = tb.filter(partitions[i]);
    }
    return tbs;
  }
}