/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.dstream.tez;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.stream.Stream;
import org.apache.tez.dag.api.Vertex;
import io.dstream.DStreamConstants;
import io.dstream.SerializableStreamAssets.SerFunction;
import io.dstream.SerializableStreamAssets.SerSupplier;
import io.dstream.support.Classifier;
import io.dstream.support.HashClassifier;
import io.dstream.utils.ReflectionUtils;
/**
*
*
*/
public class TaskDescriptor {
private final String name;
private final int id;
private final TaskDescriptor previousTaskDescriptor;
private final String operationName;
private SerFunction<Stream<?>, Stream<?>> function;
private Classifier classifier;
private int parallelism = 1;
private Class<?> sourceElementType;
private SerSupplier<?> sourceSupplier;
private List<List<TaskDescriptor>> dependentTasksChains;
private Class<?> inputFormatClass;
/**
* Will create description of a {@link Task} from which Tez {@link Vertex} is created.
* Parallelism and {@link Classifier} of the task (Vertex) is determined
* from {@link DStreamConstants#PARALLELISM} configuration
* which allows to configure both parallelism and {@link Classifier}. However, due to Tez way of
* doing things, the actual function itself should be applied to the previous task (Vertex)
* where the actual partitioning logic is invoked, while integer value representing parallelism should *also*
* be set on the current Vertex.
* To accommodate that the {@link TaskDescriptor} is created with reference to the previous
* {@link TaskDescriptor}. Upon determining partitioner configuration and parallelism for the current task,
* the actual {@link Classifier} is created and set on the previous {@link TaskDescriptor} while
* it's parallelism is set on this task.
*
* @param id
* @param name
* @param operationName
* @param executionConfig
* @param previousTaskDescriptor
*/
public TaskDescriptor(int id, String name, String operationName, Properties executionConfig, TaskDescriptor previousTaskDescriptor){
this.name = name;
this.id = id;
this.operationName = operationName;
this.previousTaskDescriptor = previousTaskDescriptor;
String parallelizmProp = executionConfig.getProperty(DStreamConstants.PARALLELISM);
String grouperProp = executionConfig.getProperty(DStreamConstants.CLASSIFIER);
if (parallelizmProp != null){
this.parallelism = Integer.parseInt(parallelizmProp);
}
Classifier classifier = grouperProp != null
? ReflectionUtils.newInstance(grouperProp, new Class[]{int.class}, new Object[]{this.parallelism})
: new HashClassifier(this.parallelism);
this.setClassifier(classifier);
}
/**
*
* @return
*/
public TaskDescriptor getPreviousTaskDescriptor() {
return previousTaskDescriptor;
}
/**
*
* @return
*/
public int getParallelism() {
return parallelism;
}
/**
*
* @return
*/
public List<List<TaskDescriptor>> getDependentTasksChains() {
return this.dependentTasksChains;
}
/**
*
* @return
*/
public String getOperationName() {
return operationName;
}
/**
*
* @param dependentTasksChain
*/
public void addDependentTasksChain(List<TaskDescriptor> dependentTasksChain) {
if (this.dependentTasksChains == null){
this.dependentTasksChains = new ArrayList<>();
}
this.dependentTasksChains.add(dependentTasksChain);
}
/**
*
* @return
*/
public Class<?> getInputFormatClass() {
return inputFormatClass;
}
/**
*
* @param inputFormatClass
*/
public void setInputFormatClass(Class<?> inputFormatClass) {
this.inputFormatClass = inputFormatClass;
}
/**
*
* @return
*/
public SerSupplier<?> getSourceSupplier() {
return this.sourceSupplier;
}
/**
*
* @return
*/
public int getId() {
return this.id;
}
/**
*
* @return
*/
public Classifier getClassifier() {
return this.classifier;
}
/**
*
* @return
*/
public SerFunction<Stream<?>, Stream<?>> getFunction() {
return this.function;
}
/**
*
* @param cFunction
*/
public void compose(SerFunction<Stream<?>, Stream<?>> cFunction) {
if (this.function != null){
this.function = this.function.compose(cFunction);
}
else {
this.function = cFunction;
}
}
/**
*
* @param aFunction
*/
public void andThen(SerFunction<Stream<?>, Stream<?>> aFunction) {
if (this.function != null){
this.function = aFunction.compose(this.function);
}
else {
this.function = aFunction;
}
}
/**
*
* @return
*/
public String getName() {
return name;
}
/**
*
* @return
*/
public Class<?> getSourceElementType() {
return sourceElementType;
}
/**
*
*/
void setSourceElementType(Class<?> sourceElementType) {
this.sourceElementType = sourceElementType;
}
/**
*
*/
void setClassifier(Classifier classifier) {
this.classifier = classifier;
}
/**
*
*/
void setSourceSupplier(SerSupplier<?> sourceSupplier) {
this.sourceSupplier = sourceSupplier;
}
}