/*
* Copyright (C) 2015 SoftIndex LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.datakernel.datagraph.dataset.impl;
import com.google.common.base.Function;
import io.datakernel.datagraph.dataset.LocallySortedDataset;
import io.datakernel.datagraph.graph.DataGraph;
import io.datakernel.datagraph.graph.Partition;
import io.datakernel.datagraph.graph.StreamId;
import io.datakernel.datagraph.node.NodeDownload;
import io.datakernel.datagraph.node.NodeReduce;
import io.datakernel.datagraph.node.NodeShard;
import io.datakernel.datagraph.node.NodeUpload;
import io.datakernel.stream.processor.StreamReducers;
import java.util.ArrayList;
import java.util.List;
public class DatasetUtils {
private DatasetUtils() {
}
@SuppressWarnings("unchecked")
public static <K, I, O> List<StreamId> repartitionAndReduce(DataGraph graph, LocallySortedDataset<K, I> input,
StreamReducers.Reducer<K, I, O, ?> reducer,
List<Partition> partitions) {
Function<I, K> keyFunction = input.keyFunction();
List<StreamId> outputStreamIds = new ArrayList<>();
List<NodeShard<K, I>> sharders = new ArrayList<>();
for (StreamId inputStreamId : input.channels(graph)) {
Partition partition = graph.getPartition(inputStreamId);
NodeShard<K, I> sharder = new NodeShard<>(keyFunction, inputStreamId);
graph.addNode(partition, sharder);
sharders.add(sharder);
}
for (Partition partition : partitions) {
NodeReduce<K, O, Object> streamReducer = new NodeReduce<>( // TODO
input.keyComparator());
graph.addNode(partition, streamReducer);
for (NodeShard<K, I> sharder : sharders) {
StreamId sharderOutput = sharder.newPartition();
graph.addNodeStream(sharder, sharderOutput);
StreamId reducerInput = forwardChannel(graph, input.valueType(), sharderOutput, partition);
streamReducer.addInput(reducerInput, keyFunction, (StreamReducers.Reducer<K, I, O, Object>) reducer);
}
outputStreamIds.add(streamReducer.getOutput());
}
return outputStreamIds;
}
public static <K, T> List<StreamId> repartitionAndSort(DataGraph graph, LocallySortedDataset<K, T> input,
List<Partition> partitions) {
return repartitionAndReduce(graph, input, StreamReducers.<K, T>mergeSortReducer(), partitions);
}
public static <T> StreamId forwardChannel(DataGraph graph, Class<T> type,
StreamId sourceStreamId, Partition targetPartition) {
Partition sourcePartition = graph.getPartition(sourceStreamId);
return forwardChannel(graph, type, sourcePartition, targetPartition, sourceStreamId);
}
private static <T> StreamId forwardChannel(DataGraph graph, Class<T> type,
Partition sourcePartition, Partition targetPartition,
StreamId sourceStreamId) {
NodeUpload<T> nodeUpload = new NodeUpload<>(type, sourceStreamId);
NodeDownload<T> nodeDownload = new NodeDownload<>(type, sourcePartition.getAddress(), sourceStreamId);
graph.addNode(sourcePartition, nodeUpload);
graph.addNode(targetPartition, nodeDownload);
return nodeDownload.getOutput();
}
}