/*
* Copyright (C) 2015 SoftIndex LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.datakernel.datagraph.stream;
import com.google.common.base.Function;
import com.google.common.base.MoreObjects;
import com.google.common.collect.Ordering;
import com.google.common.net.InetAddresses;
import io.datakernel.async.AssertingCompletionCallback;
import io.datakernel.async.AsyncRunnable;
import io.datakernel.async.CompletionCallback;
import io.datakernel.async.IgnoreCompletionCallback;
import io.datakernel.datagraph.dataset.Dataset;
import io.datakernel.datagraph.dataset.SortedDataset;
import io.datakernel.datagraph.dataset.impl.DatasetListConsumer;
import io.datakernel.datagraph.graph.DataGraph;
import io.datakernel.datagraph.graph.Partition;
import io.datakernel.datagraph.helper.StreamMergeSorterStorageStub;
import io.datakernel.datagraph.server.DatagraphClient;
import io.datakernel.datagraph.server.DatagraphEnvironment;
import io.datakernel.datagraph.server.DatagraphSerialization;
import io.datakernel.datagraph.server.DatagraphServer;
import io.datakernel.eventloop.Eventloop;
import io.datakernel.serializer.annotations.Deserialize;
import io.datakernel.serializer.annotations.Serialize;
import io.datakernel.stream.StreamConsumers;
import io.datakernel.stream.StreamDataReceiver;
import io.datakernel.stream.processor.StreamJoin;
import io.datakernel.stream.processor.StreamMap;
import io.datakernel.stream.processor.StreamMergeSorterStorage;
import io.datakernel.stream.processor.StreamReducers;
import org.junit.Ignore;
import org.junit.Test;
import java.net.InetSocketAddress;
import java.util.Arrays;
import static io.datakernel.async.AsyncRunnables.runInParallel;
import static io.datakernel.datagraph.dataset.Datasets.*;
import static io.datakernel.eventloop.FatalErrorHandlers.rethrowOnAnyError;
import static java.util.Arrays.asList;
import static org.junit.Assert.assertEquals;
public class PageRankTest {
public static final class Page {
@Serialize(order = 0)
public final long pageId;
@Serialize(order = 1)
public final long[] links;
public Page(@Deserialize("pageId") long pageId, @Deserialize("links") long[] links) {
this.pageId = pageId;
this.links = links;
}
public void disperse(Rank rank, StreamDataReceiver<Rank> callback) {
for (long link : links) {
Rank newRank = new Rank(link, rank.value / links.length);
callback.onData(newRank);
}
}
public static final Function<Page, Long> KEY_FUNCTION = new Function<Page, Long>() {
@Override
public Long apply(Page page) {
return page.pageId;
}
};
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("pageId", pageId)
.add("links", Arrays.toString(links))
.toString();
}
}
public static final class Rank {
@Serialize(order = 0)
public final long pageId;
@Serialize(order = 1)
public final double value;
public Rank(@Deserialize("pageId") long pageId, @Deserialize("value") double value) {
this.pageId = pageId;
this.value = value;
}
public static final Function<Rank, Long> KEY_FUNCTION = new Function<Rank, Long>() {
@Override
public Long apply(Rank rank) {
return rank.pageId;
}
};
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("pageId", pageId)
.add("value", value)
.toString();
}
@SuppressWarnings({"SimplifiableIfStatement", "EqualsWhichDoesntCheckParameterClass"})
@Override
public boolean equals(Object o) {
Rank rank = (Rank) o;
if (pageId != rank.pageId) return false;
return Math.abs(rank.value - value) < 0.001;
}
}
public static final class RankAccumulator {
@Serialize(order = 0)
public long pageId;
@Serialize(order = 1)
public double accumulatedRank;
@SuppressWarnings("unused")
public RankAccumulator() {
}
public RankAccumulator(long pageId) {
this.pageId = pageId;
}
public static final Function<RankAccumulator, Long> KEY_FUNCTION = new Function<RankAccumulator, Long>() {
@Override
public Long apply(RankAccumulator rankAccumulator) {
return rankAccumulator.pageId;
}
};
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("pageId", pageId)
.add("accumulatedRank", accumulatedRank)
.toString();
}
}
private static final class RankAccumulatorReducer extends StreamReducers.ReducerToResult<Long, Rank, Rank, RankAccumulator> {
@Override
public RankAccumulator createAccumulator(Long pageId) {
return new RankAccumulator(pageId);
}
@Override
public RankAccumulator accumulate(RankAccumulator accumulator, Rank value) {
accumulator.accumulatedRank += value.value;
return accumulator;
}
@Override
public RankAccumulator combine(RankAccumulator accumulator, RankAccumulator anotherAccumulator) {
accumulator.accumulatedRank += anotherAccumulator.accumulatedRank;
return accumulator;
}
@Override
public Rank produceResult(RankAccumulator accumulator) {
return new Rank(accumulator.pageId, accumulator.accumulatedRank);
}
}
public static SortedDataset<Long, Rank> pageRankIteration(SortedDataset<Long, Page> pages, SortedDataset<Long, Rank> ranks) {
Dataset<Rank> updates = join(pages, ranks,
new StreamJoin.InnerJoiner<Long, Page, Rank, Rank>() {
@Override
public void onInnerJoin(Long key, Page page, Rank rank, StreamDataReceiver<Rank> output) {
page.disperse(rank, output);
}
},
Rank.class, Rank.KEY_FUNCTION);
Dataset<Rank> newRanks = sort_Reduce_Repartition_Reduce(updates, new RankAccumulatorReducer(),
Long.class, Rank.KEY_FUNCTION, Ordering.<Long>natural(),
RankAccumulator.class, RankAccumulator.KEY_FUNCTION,
Rank.class);
return castToSorted(newRanks, Long.class, Rank.KEY_FUNCTION, Ordering.<Long>natural());
}
public static SortedDataset<Long, Rank> pageRank(SortedDataset<Long, Page> pages) {
SortedDataset<Long, Rank> ranks = castToSorted(map(pages,
new StreamMap.MapperProjection<Page, Rank>() {
@Override
public Rank apply(Page page) {
return new Rank(page.pageId, 1.0);
}
},
Rank.class), Long.class, Rank.KEY_FUNCTION, Ordering.<Long>natural());
for (int i = 0; i < 10; i++) {
ranks = pageRankIteration(pages, ranks);
}
return ranks;
}
@SuppressWarnings("ArraysAsListWithZeroOrOneArgument")
@Ignore // TODO(vmykhalko)
@Test
public void test2() throws Exception {
DatagraphSerialization serialization = new DatagraphSerialization();
InetSocketAddress address1 = new InetSocketAddress(InetAddresses.forString("127.0.0.1"), 1571);
InetSocketAddress address2 = new InetSocketAddress(InetAddresses.forString("127.0.0.1"), 1572);
final Eventloop eventloop = Eventloop.create().withFatalErrorHandler(rethrowOnAnyError());
final StreamConsumers.ToList<Rank> result1 = new StreamConsumers.ToList<>(eventloop);
final StreamConsumers.ToList<Rank> result2 = new StreamConsumers.ToList<>(eventloop);
DatagraphClient client = new DatagraphClient(eventloop, serialization);
DatagraphEnvironment environment = DatagraphEnvironment.create()
.setInstance(DatagraphSerialization.class, serialization)
.setInstance(DatagraphClient.class, client)
.setInstance(StreamMergeSorterStorage.class, new StreamMergeSorterStorageStub(eventloop));
DatagraphEnvironment environment1 = environment.extend()
.set("items", asList(new Page(1, new long[]{1, 2, 3}), new Page(3, new long[]{1})))
.set("result", result1);
DatagraphEnvironment environment2 = environment.extend()
.set("items", asList(new Page(2, new long[]{1})))
.set("result", result2);
final DatagraphServer server1 = new DatagraphServer(eventloop, environment1).withListenAddress(address1);
final DatagraphServer server2 = new DatagraphServer(eventloop, environment2).withListenAddress(address2);
Partition partition1 = new Partition(client, address1);
Partition partition2 = new Partition(client, address2);
final DataGraph graph = new DataGraph(serialization,
Arrays.asList(partition1, partition2));
SortedDataset<Long, Page> pages = repartition_Sort(sortedDatasetOfList("items",
Page.class, Long.class, Page.KEY_FUNCTION, Ordering.<Long>natural()));
SortedDataset<Long, Rank> pageRanks = pageRank(pages);
DatasetListConsumer<?> consumerNode = listConsumer(pageRanks, "result");
consumerNode.compileInto(graph);
server1.listen();
server2.listen();
runInParallel(eventloop,
new AsyncRunnable() {
@Override
public void run(CompletionCallback callback) {
result1.setCompletionCallback(callback);
}
},
new AsyncRunnable() {
@Override
public void run(CompletionCallback callback) {
result2.setCompletionCallback(callback);
}
}
).run(new AssertingCompletionCallback() {
@Override
protected void onComplete() {
server1.close(IgnoreCompletionCallback.create());
server2.close(IgnoreCompletionCallback.create());
}
});
graph.execute();
eventloop.run();
assertEquals(asList(new Rank(2, 0.6069)), result1.getList());
assertEquals(asList(new Rank(1, 1.7861), new Rank(3, 0.6069)), result2.getList());
}
}