package edu.umd.cloud9.integration.example.bfs;
import static org.junit.Assert.assertTrue;
import java.util.Random;
import junit.framework.JUnit4TestAdapter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import tl.lin.data.pair.PairOfStrings;
import com.google.common.base.Joiner;
import edu.umd.cloud9.integration.IntegrationUtils;
public class BfsIT {
private static final Random random = new Random();
private static final Path collectionPath =
new Path("/collections/wikipedia/enwiki-20121201-pages-articles");
private static final String tmpPrefix = "tmp-"
+ BfsIT.class.getCanonicalName() + "-" + random.nextInt(10000);
@Test
public void testBfs() throws Exception {
Configuration conf = IntegrationUtils.getBespinConfiguration();
FileSystem fs = FileSystem.get(conf);
assertTrue(fs.exists(collectionPath));
String[] args;
args = new String[] { "hadoop jar", IntegrationUtils.getJar("target", "cloud9"),
edu.umd.cloud9.collection.wikipedia.WikipediaDocnoMappingBuilder.class.getCanonicalName(),
"-input", collectionPath.toString(),
"-output_file", tmpPrefix + "-enwiki-20121201-docno.dat",
"-wiki_language", "en", "-keep_all"};
IntegrationUtils.exec(Joiner.on(" ").join(args));
args = new String[] { "hadoop jar", IntegrationUtils.getJar("target", "cloud9"),
edu.umd.cloud9.collection.wikipedia.RepackWikipedia.class.getCanonicalName(),
"-input", collectionPath.toString(),
"-output", tmpPrefix + "-enwiki-20121201.block",
"-mapping_file", tmpPrefix + "-enwiki-20121201-docno.dat",
"-wiki_language", "en",
"-compression_type", "block"};
IntegrationUtils.exec(Joiner.on(" ").join(args));
args = new String[] { "hadoop jar", IntegrationUtils.getJar("target", "cloud9"),
edu.umd.cloud9.collection.wikipedia.graph.ExtractWikipediaLinkGraph.class.getCanonicalName(),
"-input", tmpPrefix + "-enwiki-20121201.block",
"-edges_output", tmpPrefix + "-enwiki-20121201.edges",
"-adjacency_list_output", tmpPrefix + "-enwiki-20121201.adj",
"-num_partitions", "10"};
PairOfStrings out = IntegrationUtils.exec(Joiner.on(" ").join(args));
String errorOut = out.getRightElement();
assertTrue(errorOut.contains("EDGES=121762273"));
assertTrue(errorOut.contains("TOTAL_VERTICES=12961996"));
assertTrue(errorOut.contains("VERTICES_WITH_OUTLINKS=10813673"));
args = new String[] { "hadoop jar", IntegrationUtils.getJar("target", "cloud9"),
edu.umd.cloud9.example.bfs.EncodeBfsGraph.class.getCanonicalName(),
"-input", tmpPrefix + "-enwiki-20121201.adj",
"-output", tmpPrefix + "-enwiki-20121201.bfs/iter0000",
"-src", "12"};
IntegrationUtils.exec(Joiner.on(" ").join(args));
// First iteration of BFS.
args = new String[] { "hadoop jar", IntegrationUtils.getJar("target", "cloud9"),
edu.umd.cloud9.example.bfs.IterateBfs.class.getCanonicalName(),
"-input", tmpPrefix + "-enwiki-20121201.bfs/iter0000",
"-output", tmpPrefix + "-enwiki-20121201.bfs/iter0001",
"-num_partitions", "10"};
out = IntegrationUtils.exec(Joiner.on(" ").join(args));
errorOut = out.getRightElement();
assertTrue(errorOut.contains("ReachableInMapper=1"));
assertTrue(errorOut.contains("ReachableInReducer=573"));
// Second iteration of BFS.
args = new String[] { "hadoop jar", IntegrationUtils.getJar("target", "cloud9"),
edu.umd.cloud9.example.bfs.IterateBfs.class.getCanonicalName(),
"-input", tmpPrefix + "-enwiki-20121201.bfs/iter0001",
"-output", tmpPrefix + "-enwiki-20121201.bfs/iter0002",
"-num_partitions", "10"};
out = IntegrationUtils.exec(Joiner.on(" ").join(args));
errorOut = out.getRightElement();
assertTrue(errorOut.contains("ReachableInMapper=573"));
assertTrue(errorOut.contains("ReachableInReducer=37733"));
}
public static junit.framework.Test suite() {
return new JUnit4TestAdapter(BfsIT.class);
}
}