/* * Copyright (C) 2015 SoftIndex LLC. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.datakernel.datagraph.dataset; import com.google.common.base.Function; import com.google.common.base.Predicate; import io.datakernel.datagraph.dataset.impl.*; import io.datakernel.datagraph.graph.DataGraph; import io.datakernel.datagraph.graph.Partition; import io.datakernel.datagraph.graph.StreamId; import io.datakernel.stream.processor.StreamJoin; import io.datakernel.stream.processor.StreamMap; import io.datakernel.stream.processor.StreamReducers; import java.util.Comparator; import java.util.List; public final class Datasets { private Datasets() { } public static <K, T> SortedDataset<K, T> castToSorted(final Dataset<T> dataset, Class<K> keyType, Function<T, K> keyFunction, Comparator<K> keyComparator) { return new SortedDataset<K, T>(dataset.valueType(), keyComparator, keyType, keyFunction) { @Override public List<StreamId> channels(DataGraph graph) { return dataset.channels(graph); } }; } public static <K, T> SortedDataset<K, T> castToSorted(final LocallySortedDataset<K, T> dataset) { return new SortedDataset<K, T>(dataset.valueType(), dataset.keyComparator(), dataset.keyType(), dataset.keyFunction()) { @Override public List<StreamId> channels(DataGraph graph) { return dataset.channels(graph); } }; } public static <K, L, R, V> SortedDataset<K, V> join(SortedDataset<K, L> left, SortedDataset<K, R> right, StreamJoin.Joiner<K, L, R, V> joiner, Class<V> resultType, Function<V, K> keyFunction) { return new DatasetJoin<>(left, right, joiner, resultType, keyFunction); } public static <I, O> Dataset<O> map(Dataset<I> dataset, StreamMap.Mapper<I, O> mapper, Class<O> resultType) { return new DatasetMap<>(dataset, mapper, resultType); } public static <I, O> Dataset<O> map(Dataset<I> dataset, final Function<I, O> function, Class<O> resultType) { return map(dataset, new StreamMap.MapperProjection<I, O>() { @Override protected O apply(I input) { return function.apply(input); } }, resultType); } public static <T> Dataset<T> map(Dataset<T> dataset, StreamMap.Mapper<T, T> mapper) { return map(dataset, mapper, dataset.valueType()); } public static <T> Dataset<T> map(Dataset<T> dataset, Function<T, T> function) { return map(dataset, function, dataset.valueType()); } public static <T> Dataset<T> filter(Dataset<T> dataset, Predicate<T> predicate) { return new DatasetFilter<>(dataset, predicate, dataset.valueType()); } public static <K, I> LocallySortedDataset<K, I> localSort(Dataset<I> dataset, Class<K> keyType, Function<I, K> keyFunction, Comparator<K> keyComparator) { return new DatasetLocalSort<>(dataset, keyType, keyFunction, keyComparator); } public static <K, I, O> LocallySortedDataset<K, O> localReduce(LocallySortedDataset<K, I> stream, StreamReducers.Reducer<K, I, O, ?> reducer, Class<O> resultType, Function<O, K> resultKeyFunction) { return new DatasetLocalSortReduce<>(stream, reducer, resultType, resultKeyFunction); } public static <K, I, O> Dataset<O> repartition_Reduce(LocallySortedDataset<K, I> dataset, StreamReducers.Reducer<K, I, O, ?> reducer, Class<O> resultType) { return new DatasetRepartitionReduce<>(dataset, reducer, resultType); } public static <K, I, O> Dataset<O> repartition_Reduce(LocallySortedDataset<K, I> dataset, StreamReducers.Reducer<K, I, O, ?> reducer, Class<O> resultType, List<Partition> partitions) { return new DatasetRepartitionReduce<>(dataset, reducer, resultType, partitions); } public static <K, T> SortedDataset<K, T> repartition_Sort(LocallySortedDataset<K, T> dataset) { return new DatasetRepartitionAndSort<>(dataset); } public static <K, T> SortedDataset<K, T> repartition_Sort(LocallySortedDataset<K, T> dataset, List<Partition> partitions) { return new DatasetRepartitionAndSort<>(dataset, partitions); } public static <K, I, O, A> Dataset<O> sort_Reduce_Repartition_Reduce(Dataset<I> dataset, StreamReducers.ReducerToResult<K, I, O, A> reducer, Class<K> keyType, Function<I, K> inputKeyFunction, Comparator<K> keyComparator, Class<A> accumulatorType, Function<A, K> accumulatorKeyFunction, Class<O> outputType) { LocallySortedDataset<K, I> partiallySorted = localSort(dataset, keyType, inputKeyFunction, keyComparator); LocallySortedDataset<K, A> partiallyReduced = localReduce(partiallySorted, reducer.inputToAccumulator(), accumulatorType, accumulatorKeyFunction); return repartition_Reduce(partiallyReduced, reducer.accumulatorToOutput(), outputType); } public static <K, I, A> Dataset<A> sort_Reduce_Repartition_Reduce(Dataset<I> dataset, StreamReducers.ReducerToResult<K, I, A, A> reducer, Class<K> keyType, Function<I, K> inputKeyFunction, Comparator<K> keyComparator, Class<A> accumulatorType, Function<A, K> accumulatorKeyFunction) { return sort_Reduce_Repartition_Reduce(dataset, reducer, keyType, inputKeyFunction, keyComparator, accumulatorType, accumulatorKeyFunction, accumulatorType ); } public static <K, T> Dataset<T> sort_Reduce_Repartition_Reduce(Dataset<T> dataset, StreamReducers.ReducerToResult<K, T, T, T> reducer, Class<K> keyType, Function<T, K> keyFunction, Comparator<K> keyComparator) { return sort_Reduce_Repartition_Reduce(dataset, reducer, keyType, keyFunction, keyComparator, dataset.valueType(), keyFunction, dataset.valueType() ); } public static <T> Dataset<T> datasetOfList(Object dataId, Class<T> resultType) { return new DatasetListProducer<>(dataId, resultType); } public static <K, T> SortedDataset<K, T> sortedDatasetOfList(Object dataId, Class<T> resultType, Class<K> keyType, Function<T, K> keyFunction, Comparator<K> keyComparator) { return castToSorted(datasetOfList(dataId, resultType), keyType, keyFunction, keyComparator); } public static <T> DatasetListConsumer<T> listConsumer(Dataset<T> input, Object listId) { return new DatasetListConsumer<>(input, listId); } }