/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.graph.linkanalysis;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.iterator.FileLineIterable;
import org.apache.mahout.graph.AdjacencyMatrixJob;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.hadoop.MathHelper;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.util.Map;
/** example from "Mining Massive Datasets" (page 157) */
public final class PageRankJobTest extends MahoutTestCase {
private static final Logger log = LoggerFactory.getLogger(PageRankJobTest.class);
@Test
public void toyIntegrationTest() throws Exception {
File verticesFile = getTestTempFile("vertices.txt");
File edgesFile = getTestTempFile("edges.txt");
File outputDir = getTestTempDir("output");
outputDir.delete();
File tempDir = getTestTempDir();
Configuration conf = new Configuration();
writeLines(verticesFile, "12", "34", "56", "78");
writeLines(edgesFile,
"12,34",
"12,56",
"12,78",
"34,12",
"34,78",
"56,56",
"78,34",
"78,56");
PageRankJob pageRank = new PageRankJob();
pageRank.setConf(conf);
pageRank.run(new String[] { "--vertices", verticesFile.getAbsolutePath(), "--edges", edgesFile.getAbsolutePath(),
"--output", outputDir.getAbsolutePath(), "--numIterations", "3", "--stayingProbability", "0.8",
"--tempDir", tempDir.getAbsolutePath() });
Matrix expectedAdjacencyMatrix = new DenseMatrix(new double[][] {
{ 0, 1, 1, 1 },
{ 1, 0, 0, 1 },
{ 0, 0, 1, 0 },
{ 0, 1, 1, 0 } });
int numVertices = HadoopUtil.readInt(new Path(tempDir.getAbsolutePath(), AdjacencyMatrixJob.NUM_VERTICES), conf);
assertEquals(4, numVertices);
Matrix actualAdjacencyMatrix = MathHelper.readMatrix(conf, new Path(tempDir.getAbsolutePath(),
AdjacencyMatrixJob.ADJACENCY_MATRIX + "/part-r-00000"), numVertices, numVertices);
StringBuilder info = new StringBuilder();
info.append("\nexpected adjacency matrix\n\n");
info.append(MathHelper.nice(expectedAdjacencyMatrix));
info.append("\nactual adjacency matrix \n\n");
info.append(MathHelper.nice(actualAdjacencyMatrix));
info.append('\n');
log.info(info.toString());
Matrix expectedTransitionMatrix = new DenseMatrix(new double[][] {
{ 0.0, 0.4, 0.0, 0.0 },
{ 0.266666667, 0.0, 0.0, 0.4 },
{ 0.266666667, 0.0, 0.8, 0.4 },
{ 0.266666667, 0.4, 0.0, 0.0 } });
Matrix actualTransitionMatrix = MathHelper.readMatrix(conf, new Path(tempDir.getAbsolutePath(),
"transitionMatrix/part-r-00000"), numVertices, numVertices);
info = new StringBuilder();
info.append("\nexpected transition matrix\n\n");
info.append(MathHelper.nice(expectedTransitionMatrix));
info.append("\nactual transition matrix\n\n");
info.append(MathHelper.nice(actualTransitionMatrix));
info.append('\n');
log.info(info.toString());
MathHelper.assertMatrixEquals(expectedAdjacencyMatrix, actualAdjacencyMatrix);
MathHelper.assertMatrixEquals(expectedTransitionMatrix, actualTransitionMatrix);
Map<Integer,Double> rankPerVertex = Maps.newHashMapWithExpectedSize(numVertices);
for (CharSequence line : new FileLineIterable(new File(outputDir, "part-m-00000"))) {
String[] tokens = Iterables.toArray(Splitter.on("\t").split(line), String.class);
rankPerVertex.put(Integer.parseInt(tokens[0]), Double.parseDouble(tokens[1]));
}
assertEquals(4, rankPerVertex.size());
assertEquals(0.1206666, rankPerVertex.get(12), EPSILON);
assertEquals(0.1571111, rankPerVertex.get(34), EPSILON);
assertEquals(0.5651111, rankPerVertex.get(56), EPSILON);
assertEquals(0.1571111, rankPerVertex.get(78), EPSILON);
}
}