package com.amazonaws.bigdatablog.indexcommoncrawl;
import cascading.flow.hadoop.HadoopFlowConnector;
import cascading.scheme.local.TextLine;
import cascading.tap.Tap;
import cascading.tap.local.FileTap;
import cascading.tuple.Fields;
import org.junit.Before;
import org.junit.Test;
import cascading.flow.FlowDef;
import cascading.flow.local.LocalFlowConnector;
import java.io.IOException;
import java.util.Properties;
public class CommonCrawlIndexTest {
@Before
public void doNotCareAboutOsStuff() {
System.setProperty("line.separator", "\n");
}
@Test
public void testMain() throws IOException {
Properties properties = new ConfigReader().renderProperties(CommonCrawlIndexTest.class);
FlowDef flowDef = CommonCrawlIndex.buildFlowDef(properties);
if (properties.getProperty("platform").toString().compareTo("LOCAL")==0){
//Using cascading Local connector to exclude Hadoop and just test the logic
new LocalFlowConnector(properties).connect(flowDef).complete();
}
else {
new HadoopFlowConnector(properties).connect(flowDef).complete();
}
}
@Test
public void testCreateCommonCrawlFlowDef() throws Exception {
Properties properties = new ConfigReader().renderProperties(CommonCrawlIndexTest.class);
String sourcePath = properties.getProperty("inPath");
String sinkPath = properties.getProperty("testCreateCommonCrawlFlowDefOutput");
String sinkValidationPath = properties.getProperty("testCreateCommonCrawlFlowDefOutputValidation");
// create the Cascading "source" (input) tap to read the commonCrawl WAT file(s)
Tap source = new FileTap(new TextLine(new Fields("line")) ,sourcePath);
// create the Cascading "sink" (output) tap to dump the results
Tap sink = new FileTap(new TextLine(new Fields("line")) ,sinkPath);
//Build the Cascading Flow Definition
FlowDef flowDef = CommonCrawlIndex.createCommonCrawlFlowDef(source, sink);
new LocalFlowConnector(properties).connect(flowDef).complete();
Assert.sameContent(sinkPath, sinkValidationPath);
}
}