/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.api.java.record.operators;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import eu.stratosphere.api.common.io.GenericInputFormat;
import eu.stratosphere.api.common.operators.base.GenericDataSourceBase;
import eu.stratosphere.api.java.record.io.CollectionInputFormat;
import eu.stratosphere.types.Record;
/**
* Operator for input nodes which reads data from collection or iterator.
* Use this operator if you want to pass data from the application submitting the
* Stratosphere job to the cluster.
* There are two main ways to use the CollectionDataSource:
* * Using a @link {@link SerializableIterator}
*
* <pre>
* CollectionDataSource source = new CollectionDataSource(new SerializableIteratorTest(), "IterSource");
* </pre>
*
* * Using a Collection of Java Objects.
*
* <pre>
* CollectionDataSource source2 = new CollectionDataSource(new List<String>(), "Collection source");
* </pre>
*
* Note that you can as many elements as you want to the constructor:
*
* <pre>
* CollectionDataSource("Varargs String source", "some", "strings", "that", "get", "distributed");
* </pre>
*
* The only limitation is that the elements need to have the same type.
*/
public class CollectionDataSource extends GenericDataSourceBase<Record, GenericInputFormat<Record>> {
private static String DEFAULT_NAME = "<Unnamed Collection Data Source>";
/**
* Creates a new instance for the given input using the given input format.
*
* @param f
* The {@link CollectionInputFormat} implementation used to read the data.
* @param data
* The input data. It should be a collection, an array or a serializable iterator.
* @param name
* The given name for the Pact, used in plans, logs and progress messages.
*/
public CollectionDataSource(CollectionInputFormat f, String name, Object... data) {
super(f, OperatorInfoHelper.source(), name);
Collection<Object> tmp = new ArrayList<Object>();
for (Object o : data) {
tmp.add(o);
}
checkFormat(tmp);
f.setData(tmp);
}
public CollectionDataSource(CollectionInputFormat f, String name, Object[][] data) {
super(f, OperatorInfoHelper.source(), name);
Collection<Object> tmp = new ArrayList<Object>();
for (Object o : data) {
tmp.add(o);
}
checkFormat(tmp);
f.setData(tmp);
}
public CollectionDataSource(CollectionInputFormat f, Collection<?> data, String name) {
super(f, OperatorInfoHelper.source(), name);
checkFormat(data);
f.setData(data);
}
public <T extends Iterator<?>, Serializable> CollectionDataSource(CollectionInputFormat f, T data, String name) {
super(f, OperatorInfoHelper.source(), name);
f.setIter(data);
}
/**
* Creates a new instance for the given input using the given input format. The contract has the default name.
* The input types will be checked. If the input types don't agree, an exception will occur.
*
* @param args
* The input data. It should be a collection, an array or a serializable iterator.
* @param name
* The given name for the Pact, used in plans, logs and progress messages.
*/
public CollectionDataSource(String name, Object... args) {
this(new CollectionInputFormat(), name, args);
}
public CollectionDataSource(String name, Object[][] args) {
this(new CollectionInputFormat(), name, args);
}
public CollectionDataSource(Collection<?> args, String name) {
this(new CollectionInputFormat(), args, name);
}
public <T extends Iterator<?>, Serializable> CollectionDataSource(T args, String name) {
this(new CollectionInputFormat(), args, name);
}
// --------------------------------------------------------------------------------------------
/**
* for scala compatible, scala-to-java type conversion always has an object wrapper
*/
public CollectionDataSource(Object... args) {
this(new CollectionInputFormat(), args);
}
@SuppressWarnings("unchecked")
public CollectionDataSource(CollectionInputFormat f, Object... data) {
super(f, OperatorInfoHelper.source(), DEFAULT_NAME);
if (data.length == 1 && data[0] instanceof Iterator) {
f.setIter((Iterator<Object>) data[0]);
}
else if (data.length == 1 && data[0] instanceof Collection) {
checkFormat((Collection<Object>) data[0]);
f.setData((Collection<Object>) data[0]);
}
Collection<Object> tmp = new ArrayList<Object>();
for (Object o : data) {
tmp.add(o);
}
checkFormat(tmp);
f.setData(tmp);
}
// --------------------------------------------------------------------------------------------
/*
* check whether the input field has the same type
*/
private <T> void checkFormat(Collection<T> c) {
Class<?> type = null;
List<Class<?>> typeList = new ArrayList<Class<?>>();
Iterator<T> it = c.iterator();
while (it.hasNext()) {
Object o = it.next();
// check the input types for 1-dimension
if (type != null && !type.equals(o.getClass())) {
throw new RuntimeException("elements of input list should have the same type");
}
else {
type = o.getClass();
}
// check the input types for 2-dimension array
if (typeList.size() == 0 && o.getClass().isArray()) {
for (Object s : (Object[]) o) {
typeList.add(s.getClass());
}
}
else if (o.getClass().isArray()) {
int index = 0;
if (((Object[]) o).length != typeList.size()) {
throw new RuntimeException("elements of input list should have the same size");
}
for (Object s : (Object[]) o) {
if (!s.getClass().equals(typeList.get(index++))) {
throw new RuntimeException("elements of input list should have the same type");
}
}
}
// check the input types for 2-dimension collection
if (typeList.size() == 0 && o instanceof Collection) {
@SuppressWarnings("unchecked")
Iterator<Object> tmpIt = ((Collection<Object>) o).iterator();
while (tmpIt.hasNext()) {
Object s = tmpIt.next();
typeList.add(s.getClass());
}
}
else if (o instanceof Collection) {
int index = 0;
@SuppressWarnings("unchecked")
Iterator<Object> tmpIt = ((Collection<Object>) o).iterator();
while (tmpIt.hasNext()) {
Object s = tmpIt.next();
if (!s.getClass().equals(typeList.get(index++))) {
throw new RuntimeException("elements of input list should have the same type");
}
}
if (index != typeList.size()) {
throw new RuntimeException("elements of input list should have the same size");
}
}
}
}
}