package edu.washington.escience.myria.operator.network.distribute; import java.util.BitSet; import com.fasterxml.jackson.annotation.JsonProperty; import edu.washington.escience.myria.storage.TupleBatch; import edu.washington.escience.myria.util.HashUtils; /** * Multiple field multiple dimension hash partition function for HyperCubeJoin. */ public final class HyperCubePartitionFunction extends PartitionFunction { /** Required for Java serialization. */ private static final long serialVersionUID = 1L; @JsonProperty private final int[] hashedColumns; @JsonProperty private final int[] mappedHCDimensions; @JsonProperty private final int[] hyperCubeDimensions; /** * @param hyperCubeDimensions the sizes of each dimension of the hypercube. * @param hashedColumns which fields are hashed. * @param mappedHCDimensions mapped hypercube dimensions of hashed columns. */ public HyperCubePartitionFunction( final int[] hyperCubeDimensions, final int[] hashedColumns, final int[] mappedHCDimensions) { super(); this.hashedColumns = hashedColumns; this.hyperCubeDimensions = hyperCubeDimensions; this.mappedHCDimensions = mappedHCDimensions; } @Override public TupleBatch[] partition(final TupleBatch tb) { BitSet[] partitions = new BitSet[numPartitions()]; for (int i = 0; i < partitions.length; ++i) { partitions[i] = new BitSet(); } for (int i = 0; i < tb.numTuples(); i++) { int p = 0; for (int j = 0; j < hashedColumns.length; j++) { p += Math.floorMod( HashUtils.hashSubRow(tb, new int[] {hashedColumns[j]}, i, mappedHCDimensions[j]), hyperCubeDimensions[mappedHCDimensions[j]]); if (p != hashedColumns.length - 1) { p *= hyperCubeDimensions[mappedHCDimensions[j]]; } } partitions[p].set(i); } TupleBatch[] tbs = new TupleBatch[numPartitions()]; for (int i = 0; i < tbs.length; ++i) { tbs[i] = tb.filter(partitions[i]); } return tbs; } }