/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.mapreduce; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; import java.util.Set; import com.google.common.base.Preconditions; import com.google.common.collect.Sets; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Counters; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.kiji.mapreduce.bulkimport.KijiBulkImportJobBuilder; import org.kiji.mapreduce.bulkimport.KijiBulkImporter; import org.kiji.mapreduce.framework.JobHistoryCounters; import org.kiji.mapreduce.input.MapReduceJobInputs; import org.kiji.mapreduce.output.MapReduceJobOutputs; import org.kiji.schema.EntityId; import org.kiji.schema.KijiClientTest; import org.kiji.schema.KijiDataRequest; import org.kiji.schema.KijiRowData; import org.kiji.schema.KijiRowScanner; import org.kiji.schema.KijiTable; import org.kiji.schema.KijiTableReader; import org.kiji.schema.layout.KijiTableLayout; import org.kiji.schema.util.InstanceBuilder; import org.kiji.schema.util.ResourceUtils; /** Runs a bulk-importer job in-process against a fake HBase instance. */ public class TestBulkImporter extends KijiClientTest { private static final Logger LOG = LoggerFactory.getLogger(TestBulkImporter.class); private KijiTable mTable; private KijiTableReader mReader; @Before public final void setupTestBulkImporter() throws Exception { // Get the test table layouts. final KijiTableLayout layout = KijiTableLayout.newLayout(KijiMRTestLayouts.getTestLayout()); // Populate the environment. new InstanceBuilder(getKiji()) .withTable("test", layout) .build(); // Fill local variables. mTable = getKiji().openTable("test"); mReader = mTable.openTableReader(); } @After public final void teardownTestBulkImporter() throws Exception { ResourceUtils.closeOrLog(mReader); ResourceUtils.releaseOrLog(mTable); } /** * Producer intended to run on the generic KijiMR test layout. Uses the resource * org/kiji/mapreduce/layout/test.json */ public static class SimpleBulkImporter extends KijiBulkImporter<LongWritable, Text> { /** {@inheritDoc} */ @Override public void produce(LongWritable inputKey, Text value, KijiTableContext context) throws IOException { final String line = value.toString(); final String[] split = line.split(":"); Preconditions.checkState(split.length == 2, String.format("Unable to parse bulk-import test input line: '%s'.", line)); final String rowKey = split[0]; final String name = split[1]; final EntityId eid = context.getEntityId(rowKey); context.put(eid, "primitives", "string", name); context.put(eid, "primitives", "long", inputKey.get()); } } @Test public void testSimpleBulkImporter() throws Exception { // Prepare input file: final File inputFile = File.createTempFile("TestBulkImportInput", ".txt", getLocalTempDir()); TestingResources.writeTextFile(inputFile, TestingResources.get("org/kiji/mapreduce/TestBulkImportInput.txt")); // Run the bulk-import: final KijiMapReduceJob job = KijiBulkImportJobBuilder.create() .withConf(getConf()) .withBulkImporter(SimpleBulkImporter.class) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(inputFile.toString()))) .withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(mTable.getURI())) .build(); assertTrue(job.run()); // Validate output: final KijiRowScanner scanner = mReader.getScanner(KijiDataRequest.create("primitives")); for (KijiRowData row : scanner) { final EntityId eid = row.getEntityId(); final String rowId = Bytes.toString((byte[]) eid.getComponentByIndex(0)); final String cellContent = row.getMostRecentValue("primitives", "string").toString(); LOG.info("Row: {}, primitives.string: {}, primitives.long: {}", rowId, cellContent, row.getMostRecentValue("primitives", "long")); if (rowId.equals("row1")) { assertEquals("Marsellus Wallace", cellContent); } else if (rowId.equals("row2")) { assertEquals("Vincent Vega", cellContent); } else { fail(); } } scanner.close(); } /** * Producer intended to run on the generic KijiMR test layout. * * @see testing resource org/kiji/mapreduce/layout/test.json */ public static class BulkImporterWorkflow extends KijiBulkImporter<LongWritable, Text> { private boolean mSetupFlag = false; private int mProduceCounter = 0; private boolean mCleanupFlag = false; /** {@inheritDoc} */ @Override public void setup(KijiTableContext context) throws IOException { super.setup(context); assertFalse(mSetupFlag); assertEquals(0, mProduceCounter); mSetupFlag = true; } /** {@inheritDoc} */ @Override public void produce(LongWritable inputKey, Text value, KijiTableContext context) throws IOException { assertTrue(mSetupFlag); assertFalse(mCleanupFlag); mProduceCounter += 1; final String line = value.toString(); final String[] split = line.split(":"); Preconditions.checkState(split.length == 2, String.format("Unable to parse bulk-import test input line: '%s'.", line)); final String rowKey = split[0]; final String name = split[1]; final EntityId eid = context.getEntityId(rowKey); context.put(eid, "primitives", "string", name); } /** {@inheritDoc} */ @Override public void cleanup(KijiTableContext context) throws IOException { assertTrue(mSetupFlag); assertFalse(mCleanupFlag); assertEquals(2, mProduceCounter); // input file has 2 entries mCleanupFlag = true; super.cleanup(context); } } /** Tests the bulk-importer workflow (setup/produce/cleanup) and counters. */ @Test public void testBulkImporterWorkflow() throws Exception { // Prepare input file: final File inputFile = File.createTempFile("TestBulkImportInput", ".txt", getLocalTempDir()); TestingResources.writeTextFile(inputFile, TestingResources.get("org/kiji/mapreduce/TestBulkImportInput.txt")); // Run the bulk-import: final KijiMapReduceJob job = KijiBulkImportJobBuilder.create() .withConf(getConf()) .withBulkImporter(BulkImporterWorkflow.class) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(inputFile.toString()))) .withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(mTable.getURI())) .build(); assertTrue(job.run()); // Validate output: final KijiRowScanner scanner = mReader.getScanner(KijiDataRequest.create("primitives")); final Set<String> produced = Sets.newHashSet(); for (KijiRowData row : scanner) { final String string = row.getMostRecentValue("primitives", "string").toString(); produced.add(string); } scanner.close(); assertTrue(produced.contains("Marsellus Wallace")); assertTrue(produced.contains("Vincent Vega")); final Counters counters = job.getHadoopJob().getCounters(); assertEquals(2, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_PROCESSED).getValue()); } // TODO(KIJI-359): Implement missing tests // - bulk-importing to HFiles // - bulk-importing multiple files }