/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.api.data.batch;
import co.cask.cdap.api.annotation.Beta;
import co.cask.cdap.api.common.RuntimeArguments;
import co.cask.cdap.api.data.format.FormatSpecification;
import co.cask.cdap.api.data.stream.StreamBatchReadable;
import co.cask.cdap.api.flow.flowlet.StreamEvent;
import co.cask.cdap.api.stream.StreamEventDecoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
/**
* Defines input to a program, such as MapReduce.
*/
public abstract class Input {
private final String name;
private String alias;
private Input(String name) {
this.name = name;
}
/**
* @return The name of the input.
*/
public String getName() {
return name;
}
/**
* @return an alias of the input, to be used as the input name instead of the actual name of the input (i.e. dataset
* name or stream name). Defaults to the actual name, in the case that no alias was set.
*/
public String getAlias() {
return alias == null ? name : alias;
}
/**
* Sets an alias to be used as the input name.
*
* @param alias the alias to be set for this Input
* @return the Input being operated on
*/
public Input alias(String alias) {
this.alias = alias;
return this;
}
/**
* Returns an Input defined by a dataset.
*
* @param datasetName the name of the input dataset
*/
public static Input ofDataset(String datasetName) {
return ofDataset(datasetName, RuntimeArguments.NO_ARGUMENTS);
}
/**
* Returns an Input defined by a dataset.
*
* @param datasetName the name of the input dataset
* @param arguments the arguments to use when instantiating the dataset
*/
public static Input ofDataset(String datasetName, Map<String, String> arguments) {
return ofDataset(datasetName, arguments, null);
}
/**
* Returns an Input defined by a dataset.
*
* @param datasetName the name of the input dataset
* @param splits the data selection splits. If null, will use the splits defined by the dataset.
*/
public static Input ofDataset(String datasetName, @Nullable Iterable<? extends Split> splits) {
return ofDataset(datasetName, RuntimeArguments.NO_ARGUMENTS, splits);
}
/**
* Returns an Input defined by a dataset.
*
* @param datasetName the name of the input dataset
* @param arguments the arguments to use when instantiating the dataset
* @param splits the data selection splits. If null, will use the splits defined by the dataset.
*/
public static Input ofDataset(String datasetName, Map<String, String> arguments,
@Nullable Iterable<? extends Split> splits) {
return new DatasetInput(datasetName, arguments, splits);
}
/**
* Returns an Input defined by an InputFormatProvider.
*
* @param inputName the name of the input
*/
public static Input of(String inputName, InputFormatProvider inputFormatProvider) {
return new InputFormatProviderInput(inputName, inputFormatProvider);
}
/**
* Returns an Input defined with the given stream name with all time range.
*
* @param streamName Name of the stream.
*/
public static Input ofStream(String streamName) {
return ofStream(new StreamBatchReadable(streamName, 0, Long.MAX_VALUE));
}
/**
* Returns an Input defined by a stream with the given properties.
*
* @param streamName Name of the stream.
* @param startTime Start timestamp in milliseconds.
* @param endTime End timestamp in milliseconds.
*/
public static Input ofStream(String streamName, long startTime, long endTime) {
return ofStream(new StreamBatchReadable(streamName, startTime, endTime));
}
/**
* Returns an Input defined by a stream with the given properties.
*
* @param streamName Name of the stream
* @param startTime Start timestamp in milliseconds (inclusive) of stream events provided to the job
* @param endTime End timestamp in milliseconds (exclusive) of stream events provided to the job
* @param decoderType The {@link StreamEventDecoder} class for decoding {@link StreamEvent}
*/
public static Input ofStream(String streamName, long startTime,
long endTime, Class<? extends StreamEventDecoder> decoderType) {
return ofStream(new StreamBatchReadable(streamName, startTime, endTime, decoderType));
}
/**
* Returns an Input defined by a stream with the given properties.
*
* @param streamName Name of the stream
* @param startTime Start timestamp in milliseconds (inclusive) of stream events provided to the job
* @param endTime End timestamp in milliseconds (exclusive) of stream events provided to the job
* @param bodyFormatSpec The {@link FormatSpecification} class for decoding {@link StreamEvent}
*/
@Beta
public static Input ofStream(String streamName, long startTime,
long endTime, FormatSpecification bodyFormatSpec) {
return ofStream(new StreamBatchReadable(streamName, startTime, endTime, bodyFormatSpec));
}
/**
* Returns an Input defined by a stream.
*
* @param streamBatchReadable specifies the stream to be used as input
*/
private static Input ofStream(StreamBatchReadable streamBatchReadable) {
return new StreamInput(streamBatchReadable);
}
/**
* An implementation of {@link Input}, which defines a {@link co.cask.cdap.api.dataset.Dataset} as an input.
*/
public static class DatasetInput extends Input {
private final Map<String, String> arguments;
private final List<Split> splits;
private DatasetInput(String name, Map<String, String> arguments, @Nullable Iterable<? extends Split> splits) {
super(name);
this.arguments = Collections.unmodifiableMap(new HashMap<>(arguments));
this.splits = copySplits(splits);
}
private List<Split> copySplits(@Nullable Iterable<? extends Split> splitsToCopy) {
if (splitsToCopy == null) {
return null;
}
List<Split> copiedSplits = new ArrayList<>();
for (Split split : splitsToCopy) {
copiedSplits.add(split);
}
return copiedSplits;
}
public Map<String, String> getArguments() {
return arguments;
}
@Nullable
public List<Split> getSplits() {
return splits;
}
}
/**
* An implementation of {@link Input}, which defines a {@link co.cask.cdap.api.data.stream.Stream} as an input.
*/
public static class StreamInput extends Input {
private final StreamBatchReadable streamBatchReadable;
private StreamInput(StreamBatchReadable streamBatchReadable) {
super(streamBatchReadable.getStreamName());
this.streamBatchReadable = streamBatchReadable;
}
@Deprecated
public StreamBatchReadable getStreamBatchReadable() {
return streamBatchReadable;
}
}
/**
* An implementation of {@link Input}, which defines an {@link InputFormatProvider} as an input.
*/
public static class InputFormatProviderInput extends Input {
private final InputFormatProvider inputFormatProvider;
private InputFormatProviderInput(String name, InputFormatProvider inputFormatProvider) {
super(name);
this.inputFormatProvider = inputFormatProvider;
}
public InputFormatProvider getInputFormatProvider() {
return inputFormatProvider;
}
}
}