PageRankJobTest.java example

Explorer
mahout-rbmClassifier-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mahout.graph.linkanalysis;

import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.iterator.FileLineIterable;
import org.apache.mahout.graph.AdjacencyMatrixJob;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.hadoop.MathHelper;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.util.Map;

/** example from "Mining Massive Datasets" (page 157) */
public final class PageRankJobTest extends MahoutTestCase {

  private static final Logger log = LoggerFactory.getLogger(PageRankJobTest.class);
  @Test
  public void toyIntegrationTest() throws Exception {

    File verticesFile = getTestTempFile("vertices.txt");
    File edgesFile = getTestTempFile("edges.txt");
    File outputDir = getTestTempDir("output");
    outputDir.delete();
    File tempDir = getTestTempDir();

    Configuration conf = new Configuration();

    writeLines(verticesFile, "12", "34", "56", "78");

    writeLines(edgesFile,
        "12,34",
        "12,56",
        "12,78",
        "34,12",
        "34,78",
        "56,56",
        "78,34",
        "78,56");

    PageRankJob pageRank = new PageRankJob();
    pageRank.setConf(conf);
    pageRank.run(new String[] { "--vertices", verticesFile.getAbsolutePath(), "--edges", edgesFile.getAbsolutePath(),
        "--output", outputDir.getAbsolutePath(), "--numIterations", "3", "--stayingProbability", "0.8",
        "--tempDir", tempDir.getAbsolutePath() });

    Matrix expectedAdjacencyMatrix = new DenseMatrix(new double[][] {
        { 0, 1, 1, 1 },
        { 1, 0, 0, 1 },
        { 0, 0, 1, 0 },
        { 0, 1, 1, 0 } });

    int numVertices = HadoopUtil.readInt(new Path(tempDir.getAbsolutePath(), AdjacencyMatrixJob.NUM_VERTICES), conf);
    assertEquals(4, numVertices);
    Matrix actualAdjacencyMatrix = MathHelper.readMatrix(conf, new Path(tempDir.getAbsolutePath(),
        AdjacencyMatrixJob.ADJACENCY_MATRIX + "/part-r-00000"), numVertices, numVertices);

    StringBuilder info = new StringBuilder();
    info.append("\nexpected adjacency matrix\n\n");
    info.append(MathHelper.nice(expectedAdjacencyMatrix));
    info.append("\nactual adjacency matrix \n\n");
    info.append(MathHelper.nice(actualAdjacencyMatrix));
    info.append('\n');
    log.info(info.toString());

    Matrix expectedTransitionMatrix = new DenseMatrix(new double[][] {
        { 0.0,         0.4, 0.0, 0.0 },
        { 0.266666667, 0.0, 0.0, 0.4 },
        { 0.266666667, 0.0, 0.8, 0.4 },
        { 0.266666667, 0.4, 0.0, 0.0 } });

    Matrix actualTransitionMatrix = MathHelper.readMatrix(conf, new Path(tempDir.getAbsolutePath(),
        "transitionMatrix/part-r-00000"), numVertices, numVertices);

    info = new StringBuilder();
    info.append("\nexpected transition matrix\n\n");
    info.append(MathHelper.nice(expectedTransitionMatrix));
    info.append("\nactual transition matrix\n\n");
    info.append(MathHelper.nice(actualTransitionMatrix));
    info.append('\n');
    log.info(info.toString());

    MathHelper.assertMatrixEquals(expectedAdjacencyMatrix, actualAdjacencyMatrix);
    MathHelper.assertMatrixEquals(expectedTransitionMatrix, actualTransitionMatrix);

    Map<Integer,Double> rankPerVertex = Maps.newHashMapWithExpectedSize(numVertices);
    for (CharSequence line : new FileLineIterable(new File(outputDir, "part-m-00000"))) {
      String[] tokens = Iterables.toArray(Splitter.on("\t").split(line), String.class);
      rankPerVertex.put(Integer.parseInt(tokens[0]), Double.parseDouble(tokens[1]));
    }

    assertEquals(4, rankPerVertex.size());
    assertEquals(0.1206666, rankPerVertex.get(12), EPSILON);
    assertEquals(0.1571111, rankPerVertex.get(34), EPSILON);
    assertEquals(0.5651111, rankPerVertex.get(56), EPSILON);
    assertEquals(0.1571111, rankPerVertex.get(78), EPSILON);
  }


}