package edu.washington.escience.myria.operator.network.distribute; import java.io.Serializable; import javax.annotation.Nonnull; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonSubTypes.Type; import com.fasterxml.jackson.annotation.JsonTypeInfo; import com.google.common.base.Preconditions; import edu.washington.escience.myria.storage.TupleBatch; /** The ShuffleProducer class uses an instance of the PartitionFunction class to decide which worker a tuple should be * routed to. Typically, the ShuffleProducer class invokes {@link #partition(Tuple, Schema) partition} on every tuple it * generates. */ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type") @JsonSubTypes({ @Type(value = RoundRobinPartitionFunction.class, name = "RoundRobin"), @Type(value = IdentityPartitionFunction.class, name = "Identity"), @Type(value = HyperCubePartitionFunction.class, name = "HyperCube"), @Type(value = HashPartitionFunction.class, name = "Hash"), @Type(value = SinglePartitionFunction.class, name = "Single") }) public abstract class PartitionFunction implements Serializable { /** Required for Java serialization. */ private static final long serialVersionUID = 1L; /** number of partitions. */ private Integer numPartitions = null; /** @return the number of partitions. */ public final int numPartitions() { Preconditions.checkState(numPartitions != null, "numPartitions has not been set"); return numPartitions; } /** partition the tuple batch into TupleBatch[], each element is one partition. * * @param data the data to be partitioned. * @return an array of partitions. */ public abstract TupleBatch[] partition(@Nonnull final TupleBatch data); /** @param numPartitions the number of partitions. */ public final void setNumPartitions(final int numPartitions) { Preconditions.checkArgument(numPartitions > 0, "numPartitions must be > 0"); this.numPartitions = numPartitions; } }