/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.mapreduce; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.Map; import java.util.Set; import com.google.common.collect.Sets; import org.apache.avro.util.Utf8; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Counters; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.kiji.mapreduce.framework.JobHistoryCounters; import org.kiji.mapreduce.output.MapReduceJobOutputs; import org.kiji.mapreduce.produce.KijiProduceJobBuilder; import org.kiji.mapreduce.produce.KijiProducer; import org.kiji.mapreduce.produce.ProducerContext; import org.kiji.schema.EntityId; import org.kiji.schema.KijiClientTest; import org.kiji.schema.KijiDataRequest; import org.kiji.schema.KijiDataRequestBuilder; import org.kiji.schema.KijiRowData; import org.kiji.schema.KijiRowScanner; import org.kiji.schema.KijiTable; import org.kiji.schema.KijiTableReader; import org.kiji.schema.layout.KijiTableLayout; import org.kiji.schema.util.InstanceBuilder; import org.kiji.schema.util.ResourceUtils; /** Runs a producer job in-process against a fake HBase instance. */ public class TestProducer extends KijiClientTest { private static final Logger LOG = LoggerFactory.getLogger(TestProducer.class); /** Test table, owned by this test. */ private KijiTable mTable; /** Table reader, owned by this test. */ private KijiTableReader mReader; @Before public final void setupTestProducer() throws Exception { // Get the test table layouts. final KijiTableLayout layout = KijiTableLayout.newLayout(KijiMRTestLayouts.getTestLayout()); // Populate the environment. new InstanceBuilder(getKiji()) .withTable("test", layout) .withRow("Marsellus Wallace") .withFamily("info") .withQualifier("first_name").withValue("Marsellus") .withQualifier("last_name").withValue("Wallace") .withRow("Vincent Vega") .withFamily("info") .withQualifier("first_name").withValue("Vincent") .withQualifier("last_name").withValue("Vega") .build(); // Fill local variables. mTable = getKiji().openTable("test"); mReader = mTable.openTableReader(); } @After public final void teardownTestProducer() throws Exception { ResourceUtils.closeOrLog(mReader); ResourceUtils.releaseOrLog(mTable); } /** * Producer intended to run on the generic KijiMR test layout. Uses resource * org/kiji/mapreduce/layout/test.json. */ public static class SimpleProducer extends KijiProducer { /** {@inheritDoc} */ @Override public KijiDataRequest getDataRequest() { return KijiDataRequest.create("info"); } /** {@inheritDoc} */ @Override public String getOutputColumn() { return "map_family"; } /** {@inheritDoc} */ @Override public void produce(KijiRowData input, ProducerContext context) throws IOException { final String userId = Bytes.toString((byte[]) input.getEntityId().getComponentByIndex(0)); final String firstName = input.getMostRecentValue("info", "first_name").toString(); context.put("produced qualifier", String.format("produced content for row '%s': %s", userId, firstName)); } } @Test public void testSimpleProducer() throws Exception { // Run producer: final KijiMapReduceJob job = KijiProduceJobBuilder.create() .withConf(getConf()) .withProducer(SimpleProducer.class) .withInputTable(mTable.getURI()) .withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(mTable.getURI())) .build(); assertTrue(job.run()); // Validate produced output: final KijiDataRequestBuilder builder = KijiDataRequest.builder(); builder.newColumnsDef().addFamily("info").addFamily("map_family"); final KijiRowScanner scanner = mReader.getScanner(builder.build()); for (KijiRowData row : scanner) { final EntityId eid = row.getEntityId(); final String userId = Bytes.toString((byte[]) eid.getComponentByIndex(0)); LOG.info("Row: {}", userId); assertEquals(userId, String.format("%s %s", row.getMostRecentValue("info", "first_name"), row.getMostRecentValue("info", "last_name"))); assertEquals(1, row.getMostRecentValues("map_family").size()); final Map.Entry<String, Utf8> entry = row.<Utf8>getMostRecentValues("map_family").entrySet().iterator().next(); assertEquals("produced qualifier", entry.getKey().toString()); assertTrue(entry.getValue().toString() .startsWith(String.format("produced content for row '%s': ", userId))); } scanner.close(); } /** Producer to test the setup/produce/cleanup workflow. */ public static class ProducerWorkflow extends KijiProducer { private boolean mSetupFlag = false; private boolean mCleanupFlag = false; private int mProduceCounter = 0; /** {@inheritDoc} */ @Override public KijiDataRequest getDataRequest() { return KijiDataRequest.create("info"); } /** {@inheritDoc} */ @Override public String getOutputColumn() { return "primitives:string"; } /** {@inheritDoc} */ @Override public void setup(KijiContext context) throws IOException { super.setup(context); assertFalse(mSetupFlag); assertEquals(0, mProduceCounter); mSetupFlag = true; } /** {@inheritDoc} */ @Override public void produce(KijiRowData input, ProducerContext context) throws IOException { assertTrue(mSetupFlag); assertFalse(mCleanupFlag); mProduceCounter += 1; final String rowKey = Bytes.toString((byte[]) input.getEntityId().getComponentByIndex(0)); context.put(rowKey); } /** {@inheritDoc} */ @Override public void cleanup(KijiContext context) throws IOException { assertTrue(mSetupFlag); assertFalse(mCleanupFlag); assertEquals(2, mProduceCounter); // testProducerWorkflow sets up exactly 2 rows mCleanupFlag = true; super.cleanup(context); } } /** Tests the producer workflow (setup/produce/cleanup) and counters. */ @Test public void testProducerWorkflow() throws Exception { // Run producer: final KijiMapReduceJob job = KijiProduceJobBuilder.create() .withConf(getConf()) .withProducer(ProducerWorkflow.class) .withInputTable(mTable.getURI()) .withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(mTable.getURI())) .build(); assertTrue(job.run()); // Validate produced output: final KijiRowScanner scanner = mReader.getScanner( KijiDataRequest.create("primitives", "string")); final Set<String> produced = Sets.newHashSet(); for (KijiRowData row : scanner) { produced.add(row.getMostRecentValue("primitives", "string").toString()); } scanner.close(); assertTrue(produced.contains("Marsellus Wallace")); assertTrue(produced.contains("Vincent Vega")); final Counters counters = job.getHadoopJob().getCounters(); assertEquals(2, counters.findCounter(JobHistoryCounters.PRODUCER_ROWS_PROCESSED).getValue()); } // TODO(KIJI-359): Missing tests : // - Outputting to wrong column qualifier // - producing an HFile and bulk-loading // - multi-threaded producer/mapper // - key/value stores }