package eu.stratosphere.sopremo.operator;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import javolution.text.TypeFormat;
import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.serializers.FieldSerializer;
import eu.stratosphere.pact.common.plan.PactModule;
import eu.stratosphere.sopremo.AbstractSopremoType;
import eu.stratosphere.sopremo.ISopremoType;
import eu.stratosphere.sopremo.expressions.EvaluationExpression;
import eu.stratosphere.util.CollectionUtil;
import eu.stratosphere.util.reflect.ReflectUtil;
/**
* Base class for all Sopremo operators. Every operator consumes and produces a specific number of {@link JsonStream}s.
* The operator groups input json objects accordingly to its semantics and transforms the partitioned objects to one or
* more outputs.<br>
* Each Sopremo operator may be converted to a {@link PactModule} with the {@link #asElementaryOperators()} and
* {@link ElementarySopremoModule#asPactModule()} method.<br>
* Implementations of an operator should either extend {@link ElementaryOperator} or {@link CompositeOperator}.
*/
// @DefaultSerializer(Operator.OperatorSerializer.class)
public abstract class Operator<Self extends Operator<Self>> extends ConfigurableSopremoType implements
ISopremoType, JsonStream, Cloneable {
public final static List<? extends EvaluationExpression> ALL_KEYS =
Collections.singletonList(EvaluationExpression.VALUE);
public final static int STANDARD_DEGREE_OF_PARALLELISM = -1;
private final List<JsonStream> inputs = new ArrayList<JsonStream>();
private String name;
private List<JsonStream> outputs = new ArrayList<JsonStream>();
private int minInputs, maxInputs, minOutputs, maxOutputs;
private int degreeOfParallelism = STANDARD_DEGREE_OF_PARALLELISM;
private final boolean fixedDegreeOfParallelism;
/**
* Initializes the Operator with the annotations.
*/
public Operator() {
final InputCardinality inputs = ReflectUtil.getAnnotation(this.getClass(), InputCardinality.class);
if (inputs == null)
throw new IllegalStateException("No InputCardinality annotation found @ " + this.getClass());
final OutputCardinality outputs = ReflectUtil.getAnnotation(this.getClass(), OutputCardinality.class);
if (outputs == null)
throw new IllegalStateException("No OutputCardinality annotation found @ " + this.getClass());
this.setNumberOfInputs(inputs.value() != -1 ? inputs.value() : inputs.min(),
inputs.value() != -1 ? inputs.value() : inputs.max());
this.setNumberOfOutputs(outputs.value() != -1 ? outputs.value() : outputs.min(),
outputs.value() != -1 ? outputs.value() : outputs.max());
final DegreeOfParallelism degreeOfParallelism =
ReflectUtil.getAnnotation(this.getClass(), DegreeOfParallelism.class);
if (degreeOfParallelism == null)
this.fixedDegreeOfParallelism = false;
else {
this.fixedDegreeOfParallelism = true;
this.degreeOfParallelism = degreeOfParallelism.value();
}
}
/**
* Initializes the Operator with the given number of inputs and outputs.
*
* @param minInputs
* the minimum number of inputs
* @param maxInputs
* the maximum number of inputs
* @param minOutputs
* the minimum number of outputs
* @param maxOutputs
* the maximum number of outputs
*/
public Operator(final int minInputs, final int maxInputs, final int minOutputs, final int maxOutputs) {
this.setNumberOfInputs(minInputs, maxInputs);
this.setNumberOfOutputs(minOutputs, maxOutputs);
final DegreeOfParallelism degreeOfParallelism =
ReflectUtil.getAnnotation(this.getClass(), DegreeOfParallelism.class);
if (degreeOfParallelism == null)
this.fixedDegreeOfParallelism = false;
else {
this.fixedDegreeOfParallelism = true;
this.degreeOfParallelism = degreeOfParallelism.value();
}
}
/*
* (non-Javadoc)
* @see eu.stratosphere.sopremo.SopremoType#toString(java.lang.StringBuilder)
*/
@Override
public void appendAsString(final Appendable appendable) throws IOException {
appendable.append(this.getName());
}
public abstract ElementarySopremoModule asElementaryOperators();
@SuppressWarnings("unchecked")
@Override
public Operator<Self> clone() {
return (Operator<Self>) super.clone();
}
@Override
public boolean equals(final Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (this.getClass() != obj.getClass())
return false;
final Operator<?> other = (Operator<?>) obj;
return this.degreeOfParallelism == other.degreeOfParallelism;
}
/**
* Returns the degreeOfParallelism.
*
* @return the degreeOfParallelism
*/
public int getDegreeOfParallelism() {
return this.degreeOfParallelism;
}
/**
* Returns the output of an operator producing the {@link JsonStream} that is the input to this operator at the
* given position.
*
* @param index
* the index of the output
* @return the output that produces the input of this operator at the given position
*/
public JsonStream getInput(final int index) {
if (index < this.inputs.size())
return this.inputs.get(index);
if (index >= this.maxInputs)
throw new IndexOutOfBoundsException(String.format("index %s >= max %s", index, this.maxInputs));
return null;
}
/**
* Returns a list of operators producing the {@link JsonStream}s that are the inputs to this operator.<br>
* If multiple outputs of an operator are used as inputs for this operator, the operator appears several times.
*
* @return a list of operators that produce the input of this operator
*/
public List<Operator<?>> getInputOperators() {
return new AbstractList<Operator<?>>() {
@Override
public Operator<?> get(final int index) {
return Operator.this.inputs.get(index) == null ? null
: Operator.this.inputs.get(index).getSource().getOperator();
}
@Override
public int indexOf(final Object o) {
final ListIterator<JsonStream> e = Operator.this.inputs.listIterator();
while (e.hasNext())
if (o == e.next())
return e.previousIndex();
return -1;
}
@Override
public int size() {
return Operator.this.inputs.size();
}
};
}
/**
* Returns a list of outputs of operators producing the {@link JsonStream}s that are the inputs to this operator.<br>
* If an output is used multiple times as inputs for this operator, the output appears several times (for example in
* a self-join).
*
* @return a list of outputs that produce the input of this operator
*/
public List<JsonStream> getInputs() {
return new ArrayList<JsonStream>(this.inputs);
}
/**
* Returns the maxInputs.
*
* @return the maxInputs
*/
public int getMaxInputs() {
return this.maxInputs;
}
/**
* Returns the maxOutputs.
*
* @return the maxOutputs
*/
public int getMaxOutputs() {
return this.maxOutputs;
}
/**
* Returns the minInputs.
*
* @return the minInputs
*/
public int getMinInputs() {
return this.minInputs;
}
/**
* Returns the minOutputs.
*
* @return the minOutputs
*/
public int getMinOutputs() {
return this.minOutputs;
}
/**
* The name of this operator, which is the class name by default.
*
* @return the name of this operator.
* @see #setName(String)
*/
public String getName() {
if (this.name == null)
return this.getDefaultName();
return this.name;
}
/**
* Returns the number of inputs.
*
* @return the number of inputs
*/
public int getNumInputs() {
int numInputs = this.getMinInputs();
for (int index = numInputs; index < this.getMaxInputs() && index < this.inputs.size(); index++)
if (this.inputs.get(index) != null)
numInputs++;
return numInputs;
}
/**
* Returns the number of outputs.
*
* @return the number of outputs
*/
public int getNumOutputs() {
int numOutputs = this.getMinOutputs();
for (int index = numOutputs; index < this.getMaxOutputs() && index < this.outputs.size(); index++)
if (this.outputs.get(index) != null)
numOutputs++;
return numOutputs;
}
/**
* Returns the output at the specified index.
*
* @param index
* the index to lookup
* @return the output at the given position
*/
public JsonStream getOutput(final int index) {
this.checkSize(index, this.maxOutputs, this.outputs);
JsonStream output = this.outputs.get(index);
if (output == null)
this.outputs.set(index, output = new Output(this, index));
return output;
}
/**
* Returns all outputs of this operator.
*
* @return all outputs of this operator
*/
public List<JsonStream> getOutputs() {
final ArrayList<JsonStream> outputs = new ArrayList<JsonStream>(this.minInputs);
for (int index = 0; index < this.outputs.size(); index++)
outputs.add(this.getOutput(index));
return outputs;
}
/**
* Returns the first output of this operator.
*/
@Override
public Output getSource() {
return (Output) this.getOutput(0);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + this.degreeOfParallelism;
return result;
}
@Property
@Name(adjective = "parallel")
public void setDegreeOfParallelism(final int degree) {
if (this.degreeOfParallelism == degree)
return;
if (degree < 1)
throw new RuntimeException("Degree of Parallelism cannot be set below 1");
if (this.fixedDegreeOfParallelism)
throw new RuntimeException("This operator has a fixed degree of parallelism of " + this.degreeOfParallelism);
this.degreeOfParallelism = degree;
}
/**
* Replaces the input at the given location with the given {@link JsonStream}s.
*
* @param index
* the index of the input
* @param input
* the new input
*/
public void setInput(final int index, final JsonStream input) {
this.checkSize(index, this.maxInputs, this.inputs);
this.checkInput(input);
this.inputs.set(index, input == null ? null : input.getSource());
}
/**
* Replaces the current list of inputs with the given list of {@link JsonStream}s.
*
* @param inputs
* the new inputs
*/
public void setInputs(final JsonStream... inputs) {
this.setInputs(Arrays.asList(inputs));
}
/**
* Replaces the current list of inputs with the given list of {@link JsonStream}s.
*
* @param inputs
* the new inputs
*/
public void setInputs(final List<? extends JsonStream> inputs) {
if (inputs == null)
throw new NullPointerException("inputs must not be null");
if (this.minInputs > inputs.size() || inputs.size() > this.maxInputs)
throw new IndexOutOfBoundsException();
this.inputs.clear();
for (final JsonStream input : inputs) {
this.checkInput(input);
this.inputs.add(input == null ? null : input.getSource());
}
}
/**
* Sets the name of this operator.
*
* @param name
* the new name of this operator
*/
public void setName(final String name) {
if (name == null)
throw new NullPointerException("name must not be null");
this.name = name;
}
public void validate() throws IllegalStateException {
for (int index = 0; index < this.inputs.size(); index++)
if (this.inputs.get(index) == null)
throw new IllegalStateException("unconnected input " + index);
}
/**
* Replaces the current list of inputs with the given list of {@link JsonStream}s.
*
* @param inputs
* the new inputs
* @return this
*/
public Self withInputs(final JsonStream... inputs) {
this.setInputs(inputs);
return this.self();
}
/**
* Replaces the current list of inputs with the given list of {@link JsonStream}s.
*
* @param inputs
* the new inputs
* @return this
*/
public Self withInputs(final List<? extends JsonStream> inputs) {
this.setInputs(inputs);
return this.self();
}
/**
* Sets the name of this operator.
*
* @param name
* the new name of this operator
*/
public Self withName(final String name) {
this.setName(name);
return this.self();
}
protected void checkInput(final JsonStream input) {
// current constraint, may be removed later
if (input != null && input.getSource().getOperator() == this)
throw new IllegalArgumentException("Cyclic reference");
}
protected void checkOutput(final JsonStream input) {
// current constraint, may be removed later
if (input != null && input.getSource().getOperator() == this)
throw new IllegalArgumentException("Cyclic reference");
}
protected String getDefaultName() {
return this.getClass().getSimpleName();
}
protected int getSafeInputIndex(final JsonStream input) {
final int index = this.inputs.indexOf(input);
if (index == -1)
throw new IllegalStateException("unknown input " + input);
return index;
}
@SuppressWarnings("unchecked")
protected final Self self() {
return (Self) this;
}
protected void setNumberOfInputs(final int num) {
this.setNumberOfInputs(num, num);
}
protected void setNumberOfInputs(final int min, final int max) {
if (min > max)
throw new IllegalArgumentException();
if (min < 0 || max < 0)
throw new IllegalArgumentException();
this.minInputs = min;
this.maxInputs = max;
CollectionUtil.ensureSize(this.inputs, this.minInputs);
}
/**
* Sets the number of outputs of this operator retaining all old outputs if possible (increased number of outputs).
*
* @param numberOfOutputs
* the number of outputs
*/
protected final void setNumberOfOutputs(final int numberOfOutputs) {
if (numberOfOutputs < this.outputs.size())
this.outputs.subList(numberOfOutputs, this.outputs.size()).clear();
else
for (int index = this.outputs.size(); index < numberOfOutputs; index++)
this.outputs.add(new Output(this, index));
}
protected void setNumberOfOutputs(final int min, final int max) {
if (min > max)
throw new IllegalArgumentException();
if (min < 0 || max < 0)
throw new IllegalArgumentException();
this.minOutputs = min;
this.maxOutputs = max;
CollectionUtil.ensureSize(this.outputs, this.minOutputs);
}
/**
* Replaces the output at the given location with the given {@link JsonStream}s.
*
* @param index
* the index of the output
* @param output
* the new output
*/
protected void setOutput(final int index, final JsonStream output) {
this.checkSize(index, this.maxOutputs, this.outputs);
this.checkOutput(output);
this.outputs.set(index, output == null ? null : output.getSource());
}
/**
* Replaces the current list of outputs with the given list of {@link JsonStream}s.
*
* @param outputs
* the new outputs
*/
protected void setOutputs(final JsonStream... outputs) {
this.setOutputs(Arrays.asList(outputs));
}
/**
* Replaces the current list of outputs with the given list of {@link JsonStream}s.
*
* @param outputs
* the new outputs
*/
protected void setOutputs(final List<? extends JsonStream> outputs) {
if (outputs == null)
throw new NullPointerException("outputs must not be null");
if (this.minOutputs > outputs.size() || outputs.size() > this.maxOutputs)
throw new IndexOutOfBoundsException();
this.outputs.clear();
for (final JsonStream output : outputs) {
this.checkOutput(output);
this.outputs.add(output == null ? null : output.getSource());
}
}
private void checkSize(final int index, final int max, final List<?> list) {
if (index >= max)
throw new IndexOutOfBoundsException(String.format("index %s >= max %s", index, max));
CollectionUtil.ensureSize(list, index + 1);
}
private void readObject(final ObjectInputStream ois) throws IOException, ClassNotFoundException {
ois.defaultReadObject();
this.outputs = new ArrayList<JsonStream>();
CollectionUtil.ensureSize(this.outputs, this.minOutputs);
}
public static class OperatorOutputSerializer extends com.esotericsoftware.kryo.Serializer<Output> {
/**
* Initializes Operator.OperatorOutputSerializer.
*/
public OperatorOutputSerializer() {
}
/*
* (non-Javadoc)
* @see com.esotericsoftware.kryo.Serializer#copy(com.esotericsoftware.kryo.Kryo, java.lang.Object)
*/
@Override
public Output copy(final Kryo kryo, final Output original) {
return (Output) kryo.copy(original.getOperator()).getOutput(original.getIndex());
}
/*
* (non-Javadoc)
* @see com.esotericsoftware.kryo.Serializer#read(com.esotericsoftware.kryo.Kryo,
* com.esotericsoftware.kryo.io.Input, java.lang.Class)
*/
@Override
public Output read(final Kryo kryo, final Input input, final Class<Output> type) {
final Operator<?> operator = (Operator<?>) kryo.readClassAndObject(input);
final int index = input.readInt(true);
System.out.println(operator + " " + index);
return (Output) operator.getOutput(index);
}
/*
* (non-Javadoc)
* @see com.esotericsoftware.kryo.Serializer#write(com.esotericsoftware.kryo.Kryo,
* com.esotericsoftware.kryo.io.Output, java.lang.Object)
*/
@Override
public void write(final Kryo kryo, final com.esotericsoftware.kryo.io.Output output, final Output object) {
kryo.writeClassAndObject(output, object.getOperator());
System.out.println(object.getOperator() + " " + object.getIndex());
output.writeInt(object.getIndex(), true);
}
}
public static class OperatorSerializer extends com.esotericsoftware.kryo.Serializer<Operator<?>> {
private final FieldSerializer<Operator<?>> fieldSerializer;
private final static ThreadLocal<OperatorSerializationPool> OperatorSerializationStack =
new ThreadLocal<OperatorSerializationPool>() {
@Override
protected OperatorSerializationPool initialValue() {
return new OperatorSerializationPool();
};
};
public OperatorSerializer(final Kryo kryo, final Class<Operator<?>> type) {
this.fieldSerializer = new FieldSerializer<Operator<?>>(kryo, type);
}
/*
* (non-Javadoc)
* @see com.esotericsoftware.kryo.Serializer#copy(com.esotericsoftware.kryo.Kryo, java.lang.Object)
*/
@Override
public Operator<?> copy(final Kryo kryo, final Operator<?> original) {
return this.fieldSerializer.copy(kryo, original);
}
/*
* (non-Javadoc)
* @see com.esotericsoftware.kryo.Serializer#read(com.esotericsoftware.kryo.Kryo,
* com.esotericsoftware.kryo.io.Input, java.lang.Class)
*/
@Override
public Operator<?> read(final Kryo kryo, final Input input, final Class<Operator<?>> type) {
final OperatorSerializationPool stack = OperatorSerializationStack.get();
final List<Operator<?>> operatorDeserializedAt = stack.operatorDeserializedId;
if (input.readBoolean())
return operatorDeserializedAt.get(input.readByteUnsigned());
stack.stackDepth++;
final Operator<?> object = kryo.newInstance(type);
operatorDeserializedAt.add(object);
final FieldSerializer<?>.CachedField<?>[] fields = this.fieldSerializer.getFields();
for (int i = 0, n = fields.length; i < n; i++)
fields[i].read(input, object);
if (--stack.stackDepth == 0)
operatorDeserializedAt.clear();
return object;
}
/*
* (non-Javadoc)
* @see com.esotericsoftware.kryo.Serializer#write(com.esotericsoftware.kryo.Kryo,
* com.esotericsoftware.kryo.io.Output, java.lang.Object)
*/
@Override
public void write(final Kryo kryo, final com.esotericsoftware.kryo.io.Output output, final Operator<?> object) {
final OperatorSerializationPool stack = OperatorSerializationStack.get();
final Map<Operator<?>, Integer> operatorSerializationId = stack.operatorSerializedId;
final Integer serializationId = operatorSerializationId.get(object);
output.writeBoolean(serializationId != null);
if (serializationId != null)
output.writeByte(serializationId);
else {
operatorSerializationId.put(object, operatorSerializationId.size());
stack.stackDepth++;
this.fieldSerializer.write(kryo, output, object);
if (--stack.stackDepth == 0)
operatorSerializationId.clear();
}
}
private static class OperatorSerializationPool {
private final Map<Operator<?>, Integer> operatorSerializedId = new IdentityHashMap<Operator<?>, Integer>();
private final List<Operator<?>> operatorDeserializedId = new ArrayList<Operator<?>>();
// private final Map<Operator<?>, Operator<?>> copies = new IdentityHashMap<Operator<?>, Operator<?>>();
//
private int stackDepth;
}
}
/**
* Represents one output of this {@link Operator}. The output should be connected to another Operator to create a
* directed acyclic graph of Operators.
*/
// @DefaultSerializer(OperatorOutputSerializer.class)
public static class Output extends AbstractSopremoType implements JsonStream {
private final int index;
private final Operator<?> operator;
/**
* Initializes Operator.Output.
*/
Output() {
this.operator = null;
this.index = 0;
}
private Output(final Operator<?> operator, final int index) {
this.operator = operator;
this.index = index;
}
@Override
public void appendAsString(final Appendable appendable) throws IOException {
appendable.append(this.getOperator().toString()).append('@');
TypeFormat.format(this.index, appendable);
}
@Override
public boolean equals(final Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (this.getClass() != obj.getClass())
return false;
final Operator.Output other = (Operator.Output) obj;
return this.index == other.index && this.getOperator() == other.getOperator();
}
/**
* Returns the index of this output in the list of outputs of the associated operator.
*
* @return the index of this output
*/
public int getIndex() {
return this.index;
}
/**
* Returns the associated operator.
*
* @return the associated operator
*/
public Operator<?> getOperator() {
return this.operator;
}
@Override
public Output getSource() {
return this;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + this.index;
result = prime * result + this.getOperator().hashCode();
return result;
}
}
}