/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException;
import java.util.Set;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.kiji.mapreduce.gather.GathererContext;
import org.kiji.mapreduce.gather.KijiGatherJobBuilder;
import org.kiji.mapreduce.gather.KijiGatherer;
import org.kiji.mapreduce.output.MapReduceJobOutputs;
import org.kiji.schema.KijiClientTest;
import org.kiji.schema.KijiDataRequest;
import org.kiji.schema.KijiRowData;
import org.kiji.schema.KijiTable;
import org.kiji.schema.layout.KijiTableLayout;
import org.kiji.schema.util.InstanceBuilder;
import org.kiji.schema.util.ResourceUtils;
/** Runs a gatherer job in-process against a fake HBase instance. */
public class TestGatherer extends KijiClientTest {
/**
* Gatherer intended to run on the generic KijiMR test layout.
*/
public static class TestingGatherer extends KijiGatherer<LongWritable, Text> {
/** {@inheritDoc} */
@Override
public Class<?> getOutputKeyClass() {
return LongWritable.class;
}
/** {@inheritDoc} */
@Override
public Class<?> getOutputValueClass() {
return Text.class;
}
/** {@inheritDoc} */
@Override
public KijiDataRequest getDataRequest() {
return KijiDataRequest.create("info");
}
/** {@inheritDoc} */
@Override
public void gather(KijiRowData row, GathererContext<LongWritable, Text> context)
throws IOException {
final Integer zipCode = row.getMostRecentValue("info", "zip_code");
final String userId = Bytes.toString((byte[]) row.getEntityId().getComponentByIndex(0));
context.write(new LongWritable(zipCode), new Text(userId));
}
}
/** Test table, owned by this test. */
private KijiTable mTable;
@Before
public final void setupTestGatherer() throws Exception {
// Get the test table layouts.
final KijiTableLayout layout =
KijiTableLayout.newLayout(KijiMRTestLayouts.getTestLayout());
// Populate the environment.
new InstanceBuilder(getKiji())
.withTable("test", layout)
.withRow("Marsellus Wallace")
.withFamily("info")
.withQualifier("first_name").withValue("Marsellus")
.withQualifier("last_name").withValue("Wallace")
.withQualifier("zip_code").withValue(94110)
.withRow("Vincent Vega")
.withFamily("info")
.withQualifier("first_name").withValue("Vincent")
.withQualifier("last_name").withValue("Vega")
.withQualifier("zip_code").withValue(94110)
.build();
// Fill local variables.
mTable = getKiji().openTable("test");
}
@After
public final void teardownTestGatherer() throws Exception {
ResourceUtils.releaseOrLog(mTable);
}
@Test
public void testGatherer() throws Exception {
final File outputDir = File.createTempFile("gatherer-output", ".dir", getLocalTempDir());
Preconditions.checkState(outputDir.delete());
final int numSplits = 1;
// Run gatherer:
final KijiMapReduceJob job = KijiGatherJobBuilder.create()
.withConf(getConf())
.withGatherer(TestingGatherer.class)
.withInputTable(mTable.getURI())
.withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(
new Path(outputDir.toString()), numSplits))
.build();
assertTrue(job.run());
// Validate output:
final File outputPartFile = new File(outputDir, "part-m-00000");
final String gatheredText = FileUtils.readFileToString(outputPartFile);
final String[] lines = gatheredText.split("\n");
assertEquals(2, lines.length);
final Set<String> userIds = Sets.newHashSet();
for (String line : lines) {
final String[] split = line.split("\t");
assertEquals(2, split.length);
assertEquals("94110", split[0]);
userIds.add(split[1]);
}
assertTrue(userIds.contains("Marsellus Wallace"));
assertTrue(userIds.contains("Vincent Vega"));
}
}