/**
* (c) Copyright 2013 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce.testlib;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.Collection;
import java.util.Map;
import java.util.Random;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.kiji.mapreduce.HFileLoader;
import org.kiji.mapreduce.KijiMRTestLayouts;
import org.kiji.mapreduce.KijiMapReduceJob;
import org.kiji.mapreduce.gather.KijiGatherJobBuilder;
import org.kiji.mapreduce.output.MapReduceJobOutputs;
import org.kiji.schema.Kiji;
import org.kiji.schema.KijiDataRequest;
import org.kiji.schema.KijiDataRequestBuilder;
import org.kiji.schema.KijiRowData;
import org.kiji.schema.KijiRowScanner;
import org.kiji.schema.KijiTable;
import org.kiji.schema.KijiTableReader;
import org.kiji.schema.KijiTableWriter;
import org.kiji.schema.testutil.AbstractKijiIntegrationTest;
/** Tests bulk-importers. */
public class IntegrationTestTableMapper extends AbstractKijiIntegrationTest {
private static final Logger LOG = LoggerFactory.getLogger(IntegrationTestTableMapper.class);
private Configuration mConf = null;
private FileSystem mFS = null;
private Kiji mKiji = null;
private KijiTable mInputTable = null;
private KijiTable mOutputTable = null;
/**
* Generates a random HDFS path.
*
* @param prefix Prefix for the random file name.
* @return a random HDFS path.
* @throws Exception on error.
*/
private Path makeRandomPath(String prefix) throws Exception {
Preconditions.checkNotNull(mFS);
final Path base = new Path(FileSystem.getDefaultUri(mConf));
final Random random = new Random(System.nanoTime());
return new Path(base, String.format("/%s-%s", prefix, random.nextLong()));
}
/**
* Reads a table into a map from Kiji row keys to KijiRowData.
*
* @param table Kiji table to read from.
* @param kdr Kiji data request.
* @return a map of the rows.
* @throws Exception on error.
*/
private static Map<String, KijiRowData> toRowMap(KijiTable table, KijiDataRequest kdr)
throws Exception {
final KijiTableReader reader = table.openTableReader();
try {
final KijiRowScanner scanner = reader.getScanner(kdr);
try {
final Map<String, KijiRowData> rows = Maps.newHashMap();
for (KijiRowData row : scanner) {
rows.put(Bytes.toString((byte[]) row.getEntityId().getComponentByIndex(0)), row);
}
return rows;
} finally {
scanner.close();
}
} finally {
reader.close();
}
}
private void populateInputTable() throws Exception {
final KijiTable table = mInputTable;
final KijiTableWriter writer = table.openTableWriter();
writer.put(table.getEntityId("1"), "info", "first_name", "Marsellus");
writer.put(table.getEntityId("1"), "info", "last_name", "Wallace");
writer.put(table.getEntityId("1"), "info", "zip_code", 94110);
writer.put(table.getEntityId("2"), "info", "first_name", "Vincent");
writer.put(table.getEntityId("2"), "info", "last_name", "Vega");
writer.put(table.getEntityId("2"), "info", "zip_code", 94110);
writer.put(table.getEntityId("3"), "info", "first_name", "Jules");
writer.put(table.getEntityId("3"), "info", "last_name", "Winnfield");
writer.put(table.getEntityId("3"), "info", "zip_code", 93221);
writer.close();
}
@Before
public final void setupIntegrationTestTableMapper() throws Exception {
mConf = getConf();
mFS = FileSystem.get(mConf);
mKiji = Kiji.Factory.open(getKijiURI(), mConf);
final String inputTableName = "input";
final String outputTableName = "output";
mKiji.createTable(KijiMRTestLayouts.getTestLayout(inputTableName));
mKiji.createTable(KijiMRTestLayouts.getTestLayout(outputTableName));
mInputTable = mKiji.openTable(inputTableName);
mOutputTable = mKiji.openTable(outputTableName);
populateInputTable();
}
@After
public final void teardownIntegrationTestTableMapper() throws Exception {
mInputTable.release();
mOutputTable.release();
mKiji.release();
// NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
// causes it to close other thread's filesystem objects. For more information
// see: https://issues.apache.org/jira/browse/HADOOP-7973
mInputTable = null;
mOutputTable = null;
mKiji = null;
mFS = null;
mConf = null;
}
@Test
public void testSimpleTableMapperDirect() throws Exception {
final KijiMapReduceJob mrjob = KijiGatherJobBuilder.create()
.withConf(mConf)
.withGatherer(SimpleTableMapperAsGatherer.class)
.withInputTable(mInputTable.getURI())
.withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(mOutputTable.getURI()))
.build();
assertTrue(mrjob.run());
validateOutputTable();
}
@Test
public void testSimpleTableMapperHFiles() throws Exception {
final Path hfileDirPath = this.makeRandomPath("hfile-output");
try {
final KijiMapReduceJob mrjob = KijiGatherJobBuilder.create()
.withConf(mConf)
.withGatherer(SimpleTableMapperAsGatherer.class)
.withInputTable(mInputTable.getURI())
.withOutput(MapReduceJobOutputs.newHFileMapReduceJobOutput(
mOutputTable.getURI(), hfileDirPath, 1))
.build();
assertTrue(mrjob.run());
final HFileLoader loader = HFileLoader.create(mConf);
// There is only one reducer, hence one HFile shard:
final Path hfilePath = new Path(hfileDirPath, "part-r-00000.hfile");
loader.load(hfilePath, mOutputTable);
validateOutputTable();
} finally {
mFS.delete(hfileDirPath, true);
}
}
private void validateOutputTable() throws Exception {
final KijiDataRequestBuilder okdrb = KijiDataRequest.builder();
okdrb.newColumnsDef().withMaxVersions(3).addFamily("primitives");
final KijiDataRequest okdr = okdrb.build();
final Map<String, KijiRowData> rows = toRowMap(mOutputTable, okdr);
assertEquals(2, rows.size());
final Collection<CharSequence> peopleIn94110 =
rows.get("94110").<CharSequence>getValues("primitives", "string").values();
assertEquals(2, peopleIn94110.size());
final Collection<CharSequence> peopleIn93221 =
rows.get("93221").<CharSequence>getValues("primitives", "string").values();
assertEquals(1, peopleIn93221.size());
assertEquals("Jules Winnfield", peopleIn93221.iterator().next().toString());
}
}