/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.api.common.operators.base;
import java.util.List;
import com.google.common.base.Preconditions;
import eu.stratosphere.api.common.distributions.DataDistribution;
import eu.stratosphere.api.common.io.OutputFormat;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.Ordering;
import eu.stratosphere.api.common.operators.UnaryOperatorInformation;
import eu.stratosphere.api.common.operators.util.UserCodeObjectWrapper;
import eu.stratosphere.api.common.operators.util.UserCodeWrapper;
import eu.stratosphere.types.Nothing;
import eu.stratosphere.util.Visitor;
/**
* Operator for nodes that act as data sinks, storing the data they receive.
* The way the data is stored is handled by the {@link eu.stratosphere.api.common.io.OutputFormat}.
*/
public class GenericDataSinkBase<IN> extends Operator<Nothing> {
protected final UserCodeWrapper<? extends OutputFormat<IN>> formatWrapper;
protected Operator<IN> input = null;
private Ordering localOrdering;
private Ordering partitionOrdering;
private DataDistribution distribution;
// --------------------------------------------------------------------------------------------
/**
* Creates a GenericDataSink with the provided {@link eu.stratosphere.api.common.io.OutputFormat} implementation
* and the given name.
*
* @param f The {@link eu.stratosphere.api.common.io.OutputFormat} implementation used to sink the data.
* @param name The given name for the sink, used in plans, logs and progress messages.
*/
public GenericDataSinkBase(OutputFormat<IN> f, UnaryOperatorInformation<IN, Nothing> operatorInfo, String name) {
super(operatorInfo, name);
Preconditions.checkNotNull(f, "The OutputFormat may not be null.");
this.formatWrapper = new UserCodeObjectWrapper<OutputFormat<IN>>(f);
}
/**
* Creates a GenericDataSink with the provided {@link eu.stratosphere.api.common.io.OutputFormat} implementation
* and the given name.
*
* @param f The {@link eu.stratosphere.api.common.io.OutputFormat} implementation used to sink the data.
* @param name The given name for the sink, used in plans, logs and progress messages.
*/
public GenericDataSinkBase(UserCodeWrapper<? extends OutputFormat<IN>> f, UnaryOperatorInformation<IN, Nothing> operatorInfo, String name) {
super(operatorInfo, name);
Preconditions.checkNotNull(f, "The OutputFormat class may not be null.");
this.formatWrapper = f;
}
// --------------------------------------------------------------------------------------------
/**
* Returns this operator's input operator.
*
* @return This operator's input.
*/
public Operator<IN> getInput() {
return this.input;
}
/**
* Sets the given operator as the input to this operator.
*
* @param input The operator to use as the input.
*/
public void setInput(Operator<IN> input) {
Preconditions.checkNotNull(input, "The input may not be null.");
this.input = input;
}
/**
* Sets the input to the union of the given operators.
*
* @param inputs The operator(s) that form the input.
* @deprecated This method will be removed in future versions. Use the {@link eu.stratosphere.api.common.operators.Union} operator instead.
*/
@Deprecated
public void setInputs(Operator<IN>... inputs) {
Preconditions.checkNotNull(inputs, "The inputs may not be null.");
this.input = Operator.createUnionCascade(inputs);
}
/**
* Sets the input to the union of the given operators.
*
* @param inputs The operator(s) that form the input.
* @deprecated This method will be removed in future versions. Use the {@link eu.stratosphere.api.common.operators.Union} operator instead.
*/
@Deprecated
public void setInputs(List<Operator<IN>> inputs) {
Preconditions.checkNotNull(inputs, "The inputs may not be null.");
this.input = Operator.createUnionCascade(inputs);
}
/**
* Adds to the input the union of the given operators.
*
* @param inputs The operator(s) to be unioned with the input.
* @deprecated This method will be removed in future versions. Use the {@link eu.stratosphere.api.common.operators.Union} operator instead.
*/
@Deprecated
public void addInput(Operator<IN>... inputs) {
Preconditions.checkNotNull(inputs, "The input may not be null.");
this.input = Operator.createUnionCascade(this.input, inputs);
}
/**
* Adds to the input the union of the given operators.
*
* @param inputs The operator(s) to be unioned with the input.
* @deprecated This method will be removed in future versions. Use the {@link eu.stratosphere.api.common.operators.Union} operator instead.
*/
@SuppressWarnings("unchecked")
@Deprecated
public void addInputs(List<? extends Operator<IN>> inputs) {
Preconditions.checkNotNull(inputs, "The inputs may not be null.");
this.input = createUnionCascade(this.input, (Operator<IN>[]) inputs.toArray(new Operator[inputs.size()]));
}
// --------------------------------------------------------------------------------------------
/**
* Sets the order in which the sink must write its data. For any value other then <tt>NONE</tt>,
* this will cause the system to perform a global sort, or try to reuse an order from a
* previous operation.
*
* @param globalOrder The order to write the data in.
*/
public void setGlobalOrder(Ordering globalOrder) {
this.localOrdering = globalOrder;
setRangePartitioned(globalOrder);
}
/**
* Sets the order in which the sink must write its data. For any value other then <tt>NONE</tt>,
* this will cause the system to perform a global sort, or try to reuse an order from a
* previous operation.
*
* @param globalOrder The order to write the data in.
* @param distribution The distribution to use for the range partitioning.
*/
public void setGlobalOrder(Ordering globalOrder, DataDistribution distribution) {
this.localOrdering = globalOrder;
setRangePartitioned(globalOrder, distribution);
}
/**
* Gets the order, in which the data sink writes its data locally. Local order means that
* with in each fragment of the file inside the distributed file system, the data is ordered,
* but not across file fragments.
*
* @return NONE, if the sink writes data in any order, or ASCENDING (resp. DESCENDING),
* if the sink writes it data with a local ascending (resp. descending) order.
*/
public Ordering getLocalOrder() {
return this.localOrdering;
}
/**
* Sets the order in which the sink must write its data within each fragment in the distributed
* file system. For any value other then <tt>NONE</tt>, this will cause the system to perform a
* local sort, or try to reuse an order from a previous operation.
*
* @param localOrder The local order to write the data in.
*/
public void setLocalOrder(Ordering localOrder) {
this.localOrdering = localOrder;
}
/**
* Gets the record ordering over which the sink partitions in ranges.
*
* @return The record ordering over which to partition in ranges.
*/
public Ordering getPartitionOrdering() {
return this.partitionOrdering;
}
/**
* Sets the sink to partition the records into ranges over the given ordering.
*
* @param partitionOrdering The record ordering over which to partition in ranges.
*/
public void setRangePartitioned(Ordering partitionOrdering) {
throw new UnsupportedOperationException(
"Range partitioning is currently only supported with a user supplied data distribution.");
}
/**
* Sets the sink to partition the records into ranges over the given ordering.
* The bucket boundaries are determined using the given data distribution.
*
* @param partitionOrdering The record ordering over which to partition in ranges.
* @param distribution The distribution to use for the range partitioning.
*/
public void setRangePartitioned(Ordering partitionOrdering, DataDistribution distribution) {
if (partitionOrdering.getNumberOfFields() != distribution.getNumberOfFields()) {
throw new IllegalArgumentException("The number of keys in the distribution must match number of ordered fields.");
}
// TODO: check compatibility of distribution and ordering (number and order of keys, key types, etc.
// TODO: adapt partition ordering to data distribution (use prefix of ordering)
this.partitionOrdering = partitionOrdering;
this.distribution = distribution;
}
/**
* Gets the distribution to use for the range partitioning.
*
* @return The distribution to use for the range partitioning.
*/
public DataDistribution getDataDistribution() {
return this.distribution;
}
// --------------------------------------------------------------------------------------------
/**
* Gets the class describing this sinks output format.
*
* @return The output format class.
*/
public UserCodeWrapper<? extends OutputFormat<IN>> getFormatWrapper() {
return this.formatWrapper;
}
/**
* Gets the class describing the output format.
* <p>
* This method is basically identical to {@link #getFormatWrapper()}.
*
* @return The class describing the output format.
*
* @see eu.stratosphere.api.common.operators.Operator#getUserCodeWrapper()
*/
@Override
public UserCodeWrapper<? extends OutputFormat<IN>> getUserCodeWrapper() {
return this.formatWrapper;
}
// --------------------------------------------------------------------------------------------
/**
* Accepts the visitor and applies it this instance. This method applies the visitor in a depth-first traversal.
* The visitors pre-visit method is called and, if returning
* <tt>true</tt>, the visitor is recursively applied on the single input. After the recursion returned,
* the post-visit method is called.
*
* @param visitor The visitor.
*
* @see eu.stratosphere.util.Visitable#accept(eu.stratosphere.util.Visitor)
*/
@Override
public void accept(Visitor<Operator<?>> visitor) {
boolean descend = visitor.preVisit(this);
if (descend) {
this.input.accept(visitor);
visitor.postVisit(this);
}
}
// --------------------------------------------------------------------------------------------
@Override
public String toString() {
return this.name;
}
}