/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.java.operators;
import org.apache.flink.annotation.Internal;
import org.apache.flink.annotation.Public;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.io.NonParallelInput;
import org.apache.flink.api.common.operators.GenericDataSourceBase;
import org.apache.flink.api.common.operators.OperatorInformation;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.io.SplitDataProperties;
import org.apache.flink.configuration.Configuration;
/**
* An operation that creates a new data set (data source). The operation acts as the
* data set on which to apply further transformations. It encapsulates additional
* configuration parameters, to customize the execution.
*
* @param <OUT> The type of the elements produced by this data source.
*/
@Public
public class DataSource<OUT> extends Operator<OUT, DataSource<OUT>> {
private final InputFormat<OUT, ?> inputFormat;
private final String dataSourceLocationName;
private Configuration parameters;
private SplitDataProperties<OUT> splitDataProperties;
// --------------------------------------------------------------------------------------------
/**
* Creates a new data source.
*
* @param context The environment in which the data source gets executed.
* @param inputFormat The input format that the data source executes.
* @param type The type of the elements produced by this input format.
*/
public DataSource(ExecutionEnvironment context, InputFormat<OUT, ?> inputFormat, TypeInformation<OUT> type, String dataSourceLocationName) {
super(context, type);
this.dataSourceLocationName = dataSourceLocationName;
if (inputFormat == null) {
throw new IllegalArgumentException("The input format may not be null.");
}
this.inputFormat = inputFormat;
if (inputFormat instanceof NonParallelInput) {
this.parallelism = 1;
}
}
/**
* Gets the input format that is executed by this data source.
*
* @return The input format that is executed by this data source.
*/
@Internal
public InputFormat<OUT, ?> getInputFormat() {
return this.inputFormat;
}
/**
* Pass a configuration to the InputFormat
* @param parameters Configuration parameters
*/
public DataSource<OUT> withParameters(Configuration parameters) {
this.parameters = parameters;
return this;
}
/**
* @return Configuration for the InputFormat.
*/
public Configuration getParameters() {
return this.parameters;
}
/**
* Returns the {@link org.apache.flink.api.java.io.SplitDataProperties} for the
* {@link org.apache.flink.core.io.InputSplit}s of this DataSource
* for configurations.
*
* SplitDataProperties can help to generate more efficient execution plans.
* <br>
* <b>
* IMPORTANT: Incorrect configuration of SplitDataProperties can cause wrong results!
* </b>
*
* @return The SplitDataProperties for the InputSplits of this DataSource.
*/
@PublicEvolving
public SplitDataProperties<OUT> getSplitDataProperties() {
if(this.splitDataProperties == null) {
this.splitDataProperties = new SplitDataProperties<OUT>(this);
}
return this.splitDataProperties;
}
// --------------------------------------------------------------------------------------------
protected GenericDataSourceBase<OUT, ?> translateToDataFlow() {
String name = this.name != null ? this.name : "at "+dataSourceLocationName+" ("+inputFormat.getClass().getName()+")";
if (name.length() > 150) {
name = name.substring(0, 150);
}
@SuppressWarnings({ "unchecked", "rawtypes" })
GenericDataSourceBase<OUT, ?> source = new GenericDataSourceBase(this.inputFormat,
new OperatorInformation<OUT>(getType()), name);
source.setParallelism(parallelism);
if(this.parameters != null) {
source.getParameters().addAll(this.parameters);
}
if(this.splitDataProperties != null) {
source.setSplitDataProperties(this.splitDataProperties);
}
return source;
}
}