/**
* Copyright 2016-2017 Seznam.cz, a.s.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cz.seznam.euphoria.fluent;
import cz.seznam.euphoria.core.client.dataset.partitioning.Partitioner;
import cz.seznam.euphoria.core.client.functional.UnaryFunction;
import cz.seznam.euphoria.core.client.functional.UnaryFunctor;
import cz.seznam.euphoria.core.client.io.DataSink;
import cz.seznam.euphoria.core.client.operator.Distinct;
import cz.seznam.euphoria.core.client.operator.FlatMap;
import cz.seznam.euphoria.core.client.operator.MapElements;
import cz.seznam.euphoria.core.client.operator.Repartition;
import cz.seznam.euphoria.core.client.operator.Union;
import cz.seznam.euphoria.core.client.operator.Builders.Output;
import cz.seznam.euphoria.core.executor.Executor;
import static java.util.Objects.requireNonNull;
public class Dataset<T> {
private final cz.seznam.euphoria.core.client.dataset.Dataset<T> wrap;
Dataset(cz.seznam.euphoria.core.client.dataset.Dataset<T> wrap) {
this.wrap = requireNonNull(wrap);
}
public cz.seznam.euphoria.core.client.dataset.Dataset<T> unwrap() {
return this.wrap;
}
public <S> Dataset<S>
apply(UnaryFunction<cz.seznam.euphoria.core.client.dataset.Dataset<T>,
Output<S>> output)
{
return new Dataset<>(requireNonNull(output.apply(this.wrap)).output());
}
public Dataset<T> repartition(Partitioner<T> partitioner) {
return new Dataset<>(Repartition.of(wrap)
.setPartitioner(requireNonNull(partitioner))
.output());
}
public Dataset<T> repartition(int num) {
return new Dataset<>(Repartition.of(this.wrap)
.setNumPartitions(num)
.output());
}
public Dataset<T> repartition(int num, Partitioner<T> partitioner) {
return new Dataset<>(Repartition.of(this.wrap)
.setNumPartitions(num)
.setPartitioner(requireNonNull(partitioner))
.output());
}
public <S> Dataset<S> mapElements(UnaryFunction<T, S> f) {
return new Dataset<>(MapElements.of(this.wrap).using(requireNonNull(f)).output());
}
public <S> Dataset<S> flatMap(UnaryFunctor<T, S> f) {
return new Dataset<>(FlatMap.of(this.wrap).using(requireNonNull(f)).output());
}
public Dataset<T> distinct() {
return new Dataset<>(Distinct.of(this.wrap).output());
}
public Dataset<T> union(Dataset<T> other) {
return new Dataset<>(Union.of(this.wrap, other.wrap).output());
}
public <S extends DataSink<T>> Dataset<T> persist(S dst) {
this.wrap.persist(dst);
return this;
}
public void execute(Executor exec) throws Exception {
exec.submit(this.wrap.getFlow()).get();
}
}