/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.dstream.tez; import java.io.Serializable; import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; import java.util.Map.Entry; import java.util.stream.Stream; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import io.dstream.SerializableStreamAssets.SerFunction; import io.dstream.support.AbstractPartitionedStreamProducingSourceSupplier; import io.dstream.support.Classifier; /** * */ final class Task implements Serializable { private static final long serialVersionUID = -1800812882885490376L; private final SerFunction<Stream<?>, Stream<?>> function; private final Classifier classifier; private final String name; private final int id; private AbstractPartitionedStreamProducingSourceSupplier<?> streamProducingSourceSupplier; /** * * @param id * @param name * @param partitioner * @param function */ private Task(int id, String name, Classifier classifier, SerFunction<Stream<?>, Stream<?>> function){ this.id = id; this.name = name; this.classifier = classifier; this.function = function; } /** * * @param taskDescriptor * @return */ static Task build(TaskDescriptor taskDescriptor) { SerFunction<Stream<?>, Stream<?>> taskFunction = adjustTaskFunction(taskDescriptor); Task task = new Task(taskDescriptor.getId(), taskDescriptor.getName(), taskDescriptor.getClassifier(), taskFunction); if (taskDescriptor.getSourceSupplier() instanceof AbstractPartitionedStreamProducingSourceSupplier){ task.setStreamProducingSourceSupplier((AbstractPartitionedStreamProducingSourceSupplier<?>) taskDescriptor.getSourceSupplier()); } return task; } /** * * @return */ public SerFunction<Stream<?>, Stream<?>> getFunction() { return function; } /** * * @return */ public Classifier getClassifier() { return this.classifier; } /** * * @return */ public String getName() { return name; } /** * * @return */ public int getId() { return id; } /** * * @return */ public AbstractPartitionedStreamProducingSourceSupplier<?> getStreamProducingSourceSupplier() { return streamProducingSourceSupplier; } /** * * @param streamProducingSourceSupplier */ void setStreamProducingSourceSupplier(AbstractPartitionedStreamProducingSourceSupplier<?> streamProducingSourceSupplier) { this.streamProducingSourceSupplier = streamProducingSourceSupplier; } /** * This will adjust task function to ensure that it is compatible with Hadoop KV readers and types expected by user. * For example, reading Text file Tez will produce KV pairs (offset, line), while user is only expected the value. */ @SuppressWarnings("rawtypes") private static SerFunction<Stream<?>, Stream<?>> adjustTaskFunction(TaskDescriptor taskDescriptor){ SerFunction<Stream<?>, Stream<?>> modifiedFunction = taskDescriptor.getFunction(); if (taskDescriptor.getId() == 0 && !Entry.class.isAssignableFrom(taskDescriptor.getSourceElementType())){ if (Writable.class.isAssignableFrom(taskDescriptor.getSourceElementType())){ modifiedFunction = modifiedFunction.compose(stream -> stream.map(s -> ((Entry)s).getValue())); } else { if (taskDescriptor.getInputFormatClass() != null){// only URI based sources will have Input Format ParameterizedType parameterizedType = (ParameterizedType) taskDescriptor.getInputFormatClass().getGenericSuperclass(); Type type = parameterizedType.getActualTypeArguments()[1]; if (Text.class.getName().equals(type.getTypeName())){ if (modifiedFunction == null) { modifiedFunction = stream -> stream.map(s -> ((Entry) s).getValue().toString()); } else { modifiedFunction = modifiedFunction.compose(stream -> stream.map(s -> ((Entry) s) .getValue().toString())); } } else { //TODO need to design some type of extensible converter to support multiple types of Writable throw new IllegalStateException("Can't determine modified function"); } } } } return modifiedFunction; } }