/** * */ package uk.bl.wa.hadoop.indexer.mdx; import java.io.File; import java.io.FileOutputStream; import java.io.InputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MiniMRCluster; import org.apache.hadoop.mapred.OutputLogFilter; import org.codehaus.plexus.util.IOUtil; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import uk.bl.wa.hadoop.indexer.mdx.MDXSeqStatsGenerator; /** * @author Andrew Jackson <Andrew.Jackson@bl.uk> * */ public class MDXSeqStatsGeneratorIntegrationTest { private static final Log log = LogFactory .getLog(MDXSeqStatsGeneratorIntegrationTest.class); // Test cluster: private MiniDFSCluster dfsCluster = null; private MiniMRCluster mrCluster = null; // Input files: public final static String[] testWarcs = new String[] { "mdx-seq/mdx-warc-both.seq" }; private final Path input = new Path("inputs"); private final Path output = new Path("outputs"); /** * @throws java.lang.Exception */ @Before public void setUp() throws Exception { log.warn("Spinning up test cluster..."); // make sure the log folder exists, // otherwise the test fill fail new File("target/test-logs").mkdirs(); // System.setProperty("hadoop.log.dir", "target/test-logs"); System.setProperty("javax.xml.parsers.SAXParserFactory", "com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl"); // Configuration conf = new Configuration(); dfsCluster = new MiniDFSCluster(conf, 1, true, null); dfsCluster.getFileSystem().makeQualified(input); dfsCluster.getFileSystem().makeQualified(output); // mrCluster = new MiniMRCluster(1, dfsCluster.getFileSystem().getUri() .toString(), 1); // prepare for tests for (String filename : testWarcs) { WARCMDXGeneratorIntegrationTest.copyFileToTestCluster( dfsCluster.getFileSystem(), input, "src/test/resources/", filename); } log.warn("Spun up test cluster."); } @Test public void testSeqStats() throws Exception { log.info("Checking input file is present..."); // Check that the input file is present: Path[] inputFiles = FileUtil .stat2Paths(dfsCluster.getFileSystem().listStatus( new Path(input, "mdx-seq/"), new OutputLogFilter())); Assert.assertEquals(1, inputFiles.length); // Create a file of the inputs File tmpInputsFile = WARCMDXGeneratorIntegrationTest .writeInputFile(inputFiles); // Set up arguments for the job: String[] args = { "-i", tmpInputsFile.getAbsolutePath(), "-o", this.output.getName() }; // Set up the WARCIndexerRunner MDXSeqStatsGenerator wir = new MDXSeqStatsGenerator(); // run job // Job configuration: log.info("Setting up job config..."); JobConf jobConf = this.mrCluster.createJobConf(); wir.createJobConf(jobConf, args); log.info("Running job..."); JobClient.runJob(jobConf); log.info("Job finished, checking the results..."); // check the output exists Path[] outputFiles = FileUtil.stat2Paths(dfsCluster.getFileSystem() .listStatus( output, new OutputLogFilter())); // Assert.assertEquals(1, outputFiles.length); // Copy the output out: for (Path output : outputFiles) { FileOutputStream fout = new FileOutputStream("target/" + output.getName()); log.info(" --- output : " + output); if (dfsCluster.getFileSystem().isFile(output)) { InputStream is = dfsCluster.getFileSystem().open(output); IOUtil.copy(is, fout); } else { log.info(" --- ...skipping directory..."); } fout.close(); } // Check contents of the output: // TBA } @After public void tearDown() throws Exception { log.warn("Tearing down test cluster..."); if (dfsCluster != null) { dfsCluster.shutdown(); dfsCluster = null; } if (mrCluster != null) { mrCluster.shutdown(); mrCluster = null; } log.warn("Torn down test cluster."); } }