/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.mapreduce; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.kiji.mapreduce.framework.HFileKeyValue; import org.kiji.mapreduce.framework.KijiConfKeys; import org.kiji.mapreduce.gather.GathererContext; import org.kiji.mapreduce.gather.KijiGatherJobBuilder; import org.kiji.mapreduce.gather.KijiGatherer; import org.kiji.mapreduce.kvstore.KeyValueStore; import org.kiji.mapreduce.kvstore.RequiredStores; import org.kiji.mapreduce.kvstore.framework.KeyValueStoreConfiguration; import org.kiji.mapreduce.kvstore.impl.KeyValueStoreConfigSerializer; import org.kiji.mapreduce.kvstore.lib.EmptyKeyValueStore; import org.kiji.mapreduce.kvstore.lib.UnconfiguredKeyValueStore; import org.kiji.mapreduce.output.MapReduceJobOutputs; import org.kiji.mapreduce.output.framework.KijiHFileOutputFormat; import org.kiji.mapreduce.reducer.IdentityReducer; import org.kiji.schema.KijiClientTest; import org.kiji.schema.KijiDataRequest; import org.kiji.schema.KijiRowData; import org.kiji.schema.KijiTable; import org.kiji.schema.layout.KijiTableLayout; import org.kiji.schema.util.ResourceUtils; public class TestKijiGatherJobBuilder extends KijiClientTest { // ----------------------------------------------------------------------------------------------- /** Regular gatherer that emits (Text, Text) pairs. */ public static class SimpleGatherer extends KijiGatherer<Text, Text> { /** {@inheritDoc} */ @Override public KijiDataRequest getDataRequest() { return KijiDataRequest.create("info", "email"); } /** {@inheritDoc} */ @Override public void gather(KijiRowData input, GathererContext<Text, Text> context) throws IOException { // Gathering logic, unused in these tests. } /** {@inheritDoc} */ @Override public Class<?> getOutputKeyClass() { return Text.class; } /** {@inheritDoc} */ @Override public Class<?> getOutputValueClass() { return Text.class; } } // ----------------------------------------------------------------------------------------------- /** Gatherer that emits Kiji puts to HFiles. */ public static class GatherToHFile extends KijiGatherer<HFileKeyValue, NullWritable> { /** {@inheritDoc} */ @Override public KijiDataRequest getDataRequest() { return KijiDataRequest.create("info", "email"); } /** {@inheritDoc} */ @Override public void gather(KijiRowData input, GathererContext<HFileKeyValue, NullWritable> context) throws IOException { // Gathering logic, unused in these tests. } /** {@inheritDoc} */ @Override public Class<?> getOutputKeyClass() { return HFileKeyValue.class; } /** {@inheritDoc} */ @Override public Class<?> getOutputValueClass() { return NullWritable.class; } } // ----------------------------------------------------------------------------------------------- /** Gatherer that requires a KV store. */ public static class UnconfiguredKVGatherer extends SimpleGatherer { /** {@inheritDoc} */ @Override public Map<String, KeyValueStore<?, ?>> getRequiredStores() { return RequiredStores.just("foostore", UnconfiguredKeyValueStore.builder().build()); } } // ----------------------------------------------------------------------------------------------- /** Combiner to use in the test job. */ public static class MyCombiner extends KijiReducer<Text, Text, Text, Text> { /** {@inheritDoc} */ @Override public Class<?> getOutputKeyClass() { return Text.class; } /** {@inheritDoc} */ @Override public Class<?> getOutputValueClass() { return Text.class; } } // ----------------------------------------------------------------------------------------------- /** Reducer to use in the test job. */ public static class MyReducer extends KijiReducer<Text, Text, Text, Text> { /** {@inheritDoc} */ @Override public Class<?> getOutputKeyClass() { return Text.class; } /** {@inheritDoc} */ @Override public Class<?> getOutputValueClass() { return Text.class; } } // ----------------------------------------------------------------------------------------------- /** Dummy table reducer. */ public static class ReducerToHFile extends KijiTableReducer<Text, Text> { /** {@inheritDoc} */ @Override protected void reduce(Text key, Iterable<Text> values, KijiTableContext context) throws IOException { // Reducing logic, unused here. } } // ----------------------------------------------------------------------------------------------- /** Test table, owned by this test. */ private KijiTable mTable; @Before public void setUp() throws Exception { try { // Make doSetUp() errors visible: doSetUp(); } catch (Exception thr) { thr.printStackTrace(); throw thr; } } private void doSetUp() throws Exception { final KijiTableLayout layout = KijiTableLayout.newLayout(KijiMRTestLayouts.getTestLayout()); getKiji().createTable("test", layout); // Set the working directory so that it gets cleaned up after the test: getConf().set("mapred.working.dir", "file://" + getLocalTempDir() + "/workdir"); mTable = getKiji().openTable("test"); } @After public void tearDown() throws Exception { ResourceUtils.releaseOrLog(mTable); mTable = null; } private Path getLocalTestPath(String name) { return new Path("file://" + new File(getLocalTempDir(), name)); } @Test public void testBuildValid() throws Exception { final KijiMapReduceJob gatherJob = KijiGatherJobBuilder.create() .withConf(getConf()) .withInputTable(mTable.getURI()) .withGatherer(SimpleGatherer.class) .withCombiner(MyCombiner.class) .withReducer(MyReducer.class) .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("mypath"), 10)) .build(); // TODO: Verify that the MR Job was configured correctly. final Job job = gatherJob.getHadoopJob(); final Configuration conf = job.getConfiguration(); assertEquals(SimpleGatherer.class.getName(), conf.get(KijiConfKeys.KIJI_GATHERER_CLASS)); assertEquals(MyCombiner.class, job.getCombinerClass()); assertEquals(MyReducer.class, job.getReducerClass()); } @Test public void testGatherToHFile() throws Exception { final KijiMapReduceJob gatherJob = KijiGatherJobBuilder.create() .withConf(getConf()) .withInputTable(mTable.getURI()) .withGatherer(GatherToHFile.class) .withOutput(MapReduceJobOutputs.newHFileMapReduceJobOutput( mTable.getURI(), getLocalTestPath("hfile"), 10)) .build(); final Job job = gatherJob.getHadoopJob(); final Configuration conf = job.getConfiguration(); assertEquals(GatherToHFile.class.getName(), conf.get(KijiConfKeys.KIJI_GATHERER_CLASS)); assertEquals(null, job.getCombinerClass()); assertEquals(IdentityReducer.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(KijiHFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(HFileKeyValue.class, job.getOutputKeyClass()); assertEquals(NullWritable.class, job.getOutputValueClass()); } @Test public void testGatherReducerToHFile() throws Exception { final KijiMapReduceJob gatherJob = KijiGatherJobBuilder.create() .withConf(getConf()) .withInputTable(mTable.getURI()) .withGatherer(SimpleGatherer.class) .withReducer(ReducerToHFile.class) .withOutput(MapReduceJobOutputs.newHFileMapReduceJobOutput( mTable.getURI(), getLocalTestPath("hfile"), 10)) .build(); final Job job = gatherJob.getHadoopJob(); final Configuration conf = job.getConfiguration(); assertEquals(SimpleGatherer.class.getName(), conf.get(KijiConfKeys.KIJI_GATHERER_CLASS)); assertEquals(null, job.getCombinerClass()); assertEquals(ReducerToHFile.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(SequenceFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(HFileKeyValue.class, job.getOutputKeyClass()); assertEquals(NullWritable.class, job.getOutputValueClass()); } @Test public void testUnconfiguredKeyValueStore() throws Exception { try { // Should explode as we don't define a KVStore for 'foostore', but the class requires one: KijiGatherJobBuilder.create() .withConf(getConf()) .withInputTable(mTable.getURI()) .withGatherer(UnconfiguredKVGatherer.class) .withCombiner(MyCombiner.class) .withReducer(MyReducer.class) .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("mypath"), 10)) .build(); fail("Should have thrown an IOException."); } catch (IOException ioe) { assertEquals("Cannot use an UnconfiguredKeyValueStore. " + "You must override this on the command line or in a JobBuilder.", ioe.getMessage()); } } @Test public void testEmptyKeyValueStore() throws Exception { // We override UnconfiguredKeyValueStore with EmptyKeyValueStore; this should succeed. final KijiMapReduceJob gatherJob = KijiGatherJobBuilder.create() .withConf(getConf()) .withInputTable(mTable.getURI()) .withGatherer(UnconfiguredKVGatherer.class) .withCombiner(MyCombiner.class) .withReducer(MyReducer.class) .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("mypath"), 10)) .withStore("foostore", EmptyKeyValueStore.builder().build()) .build(); // Verify that the MR Job was configured correctly. final Job job = gatherJob.getHadoopJob(); final Configuration conf = job.getConfiguration(); assertEquals(1, conf.getInt(KeyValueStoreConfigSerializer.CONF_KEY_VALUE_STORE_COUNT, 0)); assertEquals(EmptyKeyValueStore.class.getName(), conf.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_CLASS)); assertEquals("foostore", conf.get(KeyValueStoreConfiguration.KEY_VALUE_STORE_NAMESPACE + "0." + KeyValueStoreConfigSerializer.CONF_NAME)); } }