/**
*
*/
package tap;
import static org.junit.Assert.*;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.List;
import org.joda.time.DateTime;
import org.junit.Assert;
import org.junit.Test;
import tap.CommandOptions;
import tap.Phase;
import tap.Pipe;
import tap.Tap;
import tap.core.SummationMapper;
import tap.core.SummationPipeReducer;
import tap.core.WordCountMapper;
import tap.core.WordCountReducer;
import tap.formats.tapproto.Testmsg;
/**
*
*/
public class BindingTests {
@Test
public void fileBindingTest1() {
String args2[] = { "BindingTests.fileBindingTest1", "-i", "/tmp/gaggle/", "-o",
"/tmp/TapTestsOutput3", "--force" };
CommandOptions o2 = new CommandOptions(args2);
Tap tap2 = new Tap(o2);
tap2.alerter(new TapUnitTestAlerter());
Phase phase2 = tap2
.createPhase().reads(o2.input)
.map(SummationMapper.class).groupBy("word")
.combine(SummationPipeReducer.class)
.reduce(SummationPipeReducer.class).writes(o2.output);
tap2.produces(phase2.getOutputs());
phase2.plan(tap2);
assertNotNull(phase2.input().getPath());
System.out.println("timestamp " + phase2.input().getTimestamp());
assertTrue(!phase2.input().exists());
assertTrue(0 == phase2.input().getTimestamp());
tap2.make();
}
@Test
public void fileBindingTest2() throws IOException {
String args2[] = { "BindingTests.fileBindingTest1",
"-i", "share/decameron.txt",
"-o", "/tmp/outfile.txt", "--force" };
CommandOptions o = new CommandOptions(args2);
Tap tap = new Tap(o);
tap.alerter(new TapUnitTestAlerter());
File f = new File(o.output);
// touch the file
if (f.exists())
{
// The file already exists, so just update its last modified time
if (!f.setLastModified(System.currentTimeMillis()))
{
throw new IOException("Could not touch file");
}
}
else
{
// The file doesn't exist, so create it
f.createNewFile();
}
Phase phase2 = tap
.createPhase()
.map(WordCountMapper.class)
.groupBy("word")
.combine(WordCountReducer.class)
.reduce(WordCountReducer.class)
.reads(o.input)
.writes(o.output);
tap.produces(phase2.getOutputs());
List<PhaseError> errors = phase2.plan(tap);
//there are three errors, outfile.txt is not a directory and is is incompatible with the WordCountReducer out type and consequently there is no output type defined.
Assert.assertEquals("Expecting output error", 1, errors.size());
Assert.assertTrue(errors.get(0).getMessage().contains("should be a directory"));
}
@Test
public void mapOutTest() {
String args[] = { "BindingTests.mapOutTest", "-i", "share/decameron.txt", "-o",
"/tmp/TapTestsOutput", "--force" };
CommandOptions o = new CommandOptions(args);
Tap tap = new Tap(o);
tap.alerter(new TapUnitTestAlerter());
Phase phase1 = tap.createPhase()
.reads(o.input)
.map(WordCountMapper.class)
.groupBy("word")
.combine(WordCountReducer.class)
.reduce(WordCountReducer.class)
.writes(o.output);
Assert.assertEquals(phase1.getInputs().get(0).getFormat().toString(), "STRING_FORMAT", phase1.getInputs().get(0).getFormat().toString());
tap.produces(phase1.output());
Assert.assertEquals(phase1.getInputs().get(0).getFormat().toString(), "STRING_FORMAT", phase1.getInputs().get(0).getFormat().toString());
List<PhaseError> phaseErrors = phase1.plan(tap);
Assert.assertNotNull(phaseErrors);
Assert.assertEquals("Planning errors ", 0, phaseErrors.size());
if (phaseErrors.size() > 0) {
for(PhaseError e: phaseErrors) {
System.out.println("mapOutTest: " + e.getMessage());
}
}
System.out.println(tap.getConf().get("mapred.output.format.class"));
System.out.println(phase1.getOutputs().get(0).getFormat().toString());
Assert.assertNotSame("UNKNOWN_FORMAT", phase1.getOutputs().get(0).getFormat().toString());
Assert.assertEquals(phase1.getInputs().get(0).getFormat().toString(),
"STRING_FORMAT",
phase1.getInputs().get(0).getFormat().toString());
Assert.assertEquals("AVRO_FORMAT", phase1.getOutputs().get(0).getFormat().toString());
//tap.named(o.program).make();
}
@Test
public void minimalistTest() {
String args[] = { "BindingTests.mapOutTest", "-i",
"share/decameron.txt", "-o", "/tmp/TapTestsOutput3", "--force" };
Tap tap = new Tap(new CommandOptions(args));
tap.createPhase().map(WordCountMapper.class).groupBy("word")
.reduce(WordCountReducer.class);
// to automatically trap Hadoop exceptions
tap.alerter(new TapUnitTestAlerter());
tap.make();
}
@Test
public void avroInputBindingTest() {
{
String args[] = { "BindingTests.mapOutTest", "-i",
"share/decameron.txt", "-o", "/tmp/TapTestsOutput",
"--force" };
CommandOptions o1 = new CommandOptions(args);
Tap tap1 = new Tap(o1);
tap1.alerter(new TapUnitTestAlerter());
Phase phase1 = tap1.createPhase().reads(o1.input)
.map(WordCountMapper.class).groupBy("word")
.combine(WordCountReducer.class)
.reduce(WordCountReducer.class).writes(o1.output);
tap1.named(o1.program).make();
}
{
String args2[] = { "BindingTests.mapOutTest", "-i", "share/decameron.txt", "-o",
"/tmp/TapTestsOutput2", "--force" };
CommandOptions o2 = new CommandOptions(args2);
Tap tap2 = new Tap(o2);
tap2.alerter(new TapUnitTestAlerter());
Phase phase2 = tap2
.createPhase().reads(o2.input)
.map(SummationMapper.class).groupBy("word")
.combine(SummationPipeReducer.class)
.reduce(SummationPipeReducer.class).writes(o2.output);
//Assert.assertEquals("Avro input binding",
// "AVRO_FORMAT", phase2.getInputs().get(0).getFormat().toString());
tap2.produces(phase2.output());
//Assert.assertEquals("AVRO_FORMAT", phase2.getInputs().get(0).getFormat().toString());
List<PhaseError> errors = phase2.plan(tap2);
//there are and should be planning errors....the input is decameron.txt, but we are expecting AVRO.
//Assert.assertEquals("Planning errors ", 0, errors.size());
System.out.println(tap2.getConf().get("mapred.output.format.class"));
System.out.println(phase2.getOutputs().get(0).getFormat().toString());
Assert.assertNotSame("UNKNOWN_FORMAT", phase2.getOutputs().get(0).getFormat().toString());
Assert.assertEquals("AVRO_FORMAT", phase2.getOutputs().get(0).getFormat().toString());
}
}
@Test
public void outputStringBindingTest() {
String args2[] = { "BindingTests.mapOutTest", "-i", "/tmp/TapTestsOutput", "-o",
"/tmp/TapTestsOutput3", "--force" };
CommandOptions o2 = new CommandOptions(args2);
Tap tap2 = new Tap(o2);
tap2.alerter(new TapUnitTestAlerter());
Phase phase2 = tap2
.createPhase().reads(o2.input)
.map(SummationMapper.class).groupBy("word")
.combine(SummationPipeReducer.class)
.reduce(SummationPipeReducer.class).writes(o2.output);
tap2.produces(phase2.getOutputs());
phase2.plan(tap2);
Assert.assertNotSame("STRING_FORMAT", phase2.getOutputs().get(0).getFormat().toString());
Assert.assertEquals("AVRO_FORMAT", phase2.getInputs().get(0).getFormat().toString());
}
@Test
/*
* the input file contains data in avro format, mapper is expecting data in protobuf format. phase.plan should generate an error.
*/
public void checkFileContentsTest()
{
String args[] = { "BindingTests.checkFileContents", "-i", "share/test_data.avro", "-o",
"/tmp/TapTestsOutput3", "--force" };
CommandOptions o = new CommandOptions(args);
Tap tap = new Tap(o);
Phase phase = tap.createPhase().reads(o.input).map(Mapper.class).groupBy("group").reduce(Reducer.class).sortBy("extra, subsort").writes(o.output);
tap.produces(phase.getOutputs());
List<PhaseError> phaseErrors = phase.plan(tap);
Assert.assertNotNull(phaseErrors);
Assert.assertTrue("planning error", phaseErrors.size() != 0);
}
@Test
public void directoryTest()
{
String args[] = { "BindingTests.directoryTest", "-i",
"share/multi/01", "-o", "/tmp/TapTestsOutput4", "--force" };
CommandOptions o = new CommandOptions(args);
Tap tap = new Tap(o);
tap.createPhase().map(WordCountMapper.class).reads(o.input).groupBy("word")
.reduce(WordCountReducer.class).writes(o.output);
// to automatically trap Hadoop exceptions
tap.alerter(new TapUnitTestAlerter());
int rc = tap.make();
Assert.assertEquals(0, rc);
File f = new File(o.output+"/part-00000.avro");
Assert.assertTrue(f.exists());
}
@Test
public void wrongExtensionTest()
{
String args[] = { "BindingTests.checkFileContents", "-i", "share/wrong_extension.tapproto", "-o",
"/tmp/TapTestsOutput3", "--force" };
CommandOptions o = new CommandOptions(args);
Tap tap = new Tap(o);
Phase phase = tap.createPhase().reads(o.input).map(Mapper.class).groupBy("group").reduce(Reducer.class).sortBy("extra, subsort").writes(o.output);
int rc = tap.make();
Assert.assertFalse(rc == 0);
}
public class StringOutReducer extends TapReducer<CountRec, String> {
private OutputLog outLog = new OutputLog("sum of words", 0);
@Override
public void reduce(Pipe<CountRec> in, Pipe<String> out) {
int loopCount = 0;
CountRec val;
while (in.hasNext()) {
val = in.next();
// System.out.printf("<CountRec> (%s, %d) \n", val.word,
// val.count);
outLog.count += val.count;
loopCount++;
}
System.out
.printf("SumationPipeReducer: Loop Count=%d Outputing outlog.count = %d\n",
loopCount, outLog.count);
out.put("sum is " + outLog.count);
}
}
public static class Mapper extends TapMapper<Testmsg.TestRecord, Testmsg.TestRecord>
{
public void map(Testmsg.TestRecord msg, Pipe<Testmsg.TestRecord> out)
{
out.put(msg);
}
}
public static class Reducer extends TapReducer<Testmsg.TestRecord, Testmsg.TestRecord>
{
public void reduce(Pipe<Testmsg.TestRecord> in, Pipe<Testmsg.TestRecord> out)
{
System.out.println("**************");
for(Testmsg.TestRecord rec : in)
{
System.out.println(rec.getGroup() + " " + rec.getExtra() + " " + rec.getSubsort());
}
}
}
}