package org.bigtop.bigpetstore.integration; import java.io.BufferedReader; import java.io.File; import java.io.InputStreamReader; import java.util.Map; import java.util.Map.Entry; import junit.framework.Assert; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.pig.ExecType; import org.bigtop.bigpetstore.etl.PigCSVCleaner; import org.bigtop.bigpetstore.util.BigPetStoreConstants; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Function; import com.google.common.collect.ImmutableMap; import com.google.common.io.Files; /** * Created with IntelliJ IDEA. * User: ubu * Date: 1/27/14 * Time: 7:21 AM * To change this template use File | Settings | File Templates. */ public class BigPetStorePigIT extends ITUtils{ final static Logger log = LoggerFactory.getLogger(BigPetStorePigIT.class); /** * An extra unsupported code path that we have so * people can do ad hoc analytics on pig data after it is * cleaned. */ public static final Path BPS_TEST_PIG_COUNT_PRODUCTS = fs.makeQualified( new Path("bps_integration_", BigPetStoreConstants.OUTPUTS.pig_ad_hoc_script.name()+"0")); static final File PIG_SCRIPT = new File("BPS_analytics.pig"); static { if(PIG_SCRIPT.exists()) { } else throw new RuntimeException("Couldnt find pig script at " + PIG_SCRIPT.getAbsolutePath()); } @Before public void setupTest() throws Throwable { super.setup(); try{ FileSystem.get(new Configuration()).delete(BPS_TEST_PIG_CLEANED); FileSystem.get(new Configuration()).delete(BPS_TEST_PIG_COUNT_PRODUCTS); } catch(Exception e){ System.out.println("didnt need to delete pig output."); //not necessarily an error } } static Map<Path,Function<String,Boolean>> TESTS = ImmutableMap.of( /** * Test of the main output */ BPS_TEST_PIG_CLEANED, new Function<String, Boolean>(){ public Boolean apply(String x){ //System.out.println("Verified..."); return true; } }, //Example of how to count products //after doing basic pig data cleanup BPS_TEST_PIG_COUNT_PRODUCTS, new Function<String, Boolean>(){ //Jeff' public Boolean apply(String x){ return true; } }); /** * The "core" task reformats data to TSV. lets test that first. */ @Test public void testPetStoreCorePipeline() throws Exception { runPig( BPS_TEST_GENERATED, BPS_TEST_PIG_CLEANED, PIG_SCRIPT); for(Entry<Path,Function<String,Boolean>> e : TESTS.entrySet()) { assertOutput(e.getKey(),e.getValue()); } } public static void assertOutput(Path base,Function<String, Boolean> validator) throws Exception{ FileSystem fs = FileSystem.getLocal(new Configuration()); FileStatus[] files=fs.listStatus(base); //print out all the files. for(FileStatus stat : files){ System.out.println(stat.getPath() +" " + stat.getLen()); } /** * Support map OR reduce outputs */ Path partm = new Path(base,"part-m-00000"); Path partr = new Path(base,"part-r-00000"); Path p = fs.exists(partm)?partm:partr; /** * Now we read through the file and validate * its contents. */ BufferedReader r = new BufferedReader( new InputStreamReader(fs.open(p))); //line:{"product":"big chew toy","count":3} while(r.ready()){ String line = r.readLine(); log.info("line:"+line); //System.out.println("line:"+line); Assert.assertTrue("validationg line : " + line, validator.apply(line)); } } Map pigResult; private void runPig(Path input, Path output, File pigscript) throws Exception { new PigCSVCleaner( input, output, ExecType.LOCAL, pigscript); } }