/**
* (c) Copyright 2013 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce.testlib;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.OutputStream;
import java.util.Map;
import java.util.Random;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.kiji.mapreduce.HFileLoader;
import org.kiji.mapreduce.KijiMRTestLayouts;
import org.kiji.mapreduce.KijiMapReduceJob;
import org.kiji.mapreduce.bulkimport.KijiBulkImportJobBuilder;
import org.kiji.mapreduce.input.MapReduceJobInputs;
import org.kiji.mapreduce.output.MapReduceJobOutputs;
import org.kiji.schema.Kiji;
import org.kiji.schema.KijiDataRequest;
import org.kiji.schema.KijiRowData;
import org.kiji.schema.KijiRowScanner;
import org.kiji.schema.KijiTable;
import org.kiji.schema.KijiTableReader;
import org.kiji.schema.layout.KijiTableLayout;
import org.kiji.schema.testutil.AbstractKijiIntegrationTest;
/** Tests bulk-importers. */
public class IntegrationTestSimpleBulkImporter extends AbstractKijiIntegrationTest {
private Configuration mConf = null;
private FileSystem mFS = null;
private Path mBulkImportInputPath = null;
private Kiji mKiji = null;
private KijiTable mOutputTable = null;
/**
* Generates a random HDFS path.
*
* @param prefix Prefix for the random file name.
* @return a random HDFS path.
* @throws Exception on error.
*/
private Path makeRandomPath(String prefix) throws Exception {
Preconditions.checkNotNull(mFS);
final Path base = new Path(FileSystem.getDefaultUri(mConf));
final Random random = new Random(System.nanoTime());
return new Path(base, String.format("/%s-%s", prefix, random.nextLong()));
}
private void writeBulkImportInput(Path path) throws Exception {
final String[] inputLines = {
"row1:1",
"row2:2",
"row3:2",
"row4:2",
"row5:5",
"row6:1",
"row7:2",
"row8:1",
"row9:2",
"row10:2",
};
final OutputStream ostream = mFS.create(path);
for (String line : inputLines) {
IOUtils.write(line, ostream);
ostream.write('\n');
}
ostream.close();
}
/**
* Reads a table into a map from Kiji row keys to KijiRowData.
*
* @param table Kiji table to read from.
* @param kdr Kiji data request.
* @return a map of the rows.
* @throws Exception on error.
*/
private static Map<String, KijiRowData> toRowMap(KijiTable table, KijiDataRequest kdr)
throws Exception {
final KijiTableReader reader = table.openTableReader();
try {
final KijiRowScanner scanner = reader.getScanner(kdr);
try {
final Map<String, KijiRowData> rows = Maps.newHashMap();
for (KijiRowData row : scanner) {
rows.put(Bytes.toString((byte[]) row.getEntityId().getComponentByIndex(0)), row);
}
return rows;
} finally {
scanner.close();
}
} finally {
reader.close();
}
}
@Before
public void setUp() throws Exception {
mConf = createConfiguration();
mFS = FileSystem.get(mConf);
mBulkImportInputPath = makeRandomPath("bulk-import-input");
writeBulkImportInput(mBulkImportInputPath);
mKiji = Kiji.Factory.open(getKijiURI(), mConf);
final KijiTableLayout layout = KijiTableLayout.newLayout(KijiMRTestLayouts.getTestLayout());
mKiji.createTable("test", layout);
mOutputTable = mKiji.openTable("test");
}
@After
public void tearDown() throws Exception {
mOutputTable.release();
mKiji.release();
mFS.delete(mBulkImportInputPath, false);
// NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
// causes it to close other thread's filesystem objects. For more information
// see: https://issues.apache.org/jira/browse/HADOOP-7973
mOutputTable = null;
mKiji = null;
mBulkImportInputPath = null;
mFS = null;
mConf = null;
}
@Test
public void testSimpleBulkImporterDirect() throws Exception {
final KijiMapReduceJob mrjob = KijiBulkImportJobBuilder.create()
.withConf(mConf)
.withBulkImporter(SimpleBulkImporter.class)
.withInput(MapReduceJobInputs.newTextMapReduceJobInput(mBulkImportInputPath))
.withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(mOutputTable.getURI()))
.build();
assertTrue(mrjob.run());
validateOutputTable();
}
@Test
public void testSimpleBulkImporterHFile() throws Exception {
final Path hfileDirPath = this.makeRandomPath("hfile-output");
try {
final KijiMapReduceJob mrjob = KijiBulkImportJobBuilder.create()
.withConf(mConf)
.withBulkImporter(SimpleBulkImporter.class)
.withInput(MapReduceJobInputs.newTextMapReduceJobInput(mBulkImportInputPath))
.withOutput(MapReduceJobOutputs.newHFileMapReduceJobOutput(
mOutputTable.getURI(), hfileDirPath))
.build();
assertTrue(mrjob.run());
final HFileLoader loader = HFileLoader.create(mConf);
// There is only one reducer, hence one HFile shard:
final Path hfilePath = new Path(hfileDirPath, "part-r-00000.hfile");
loader.load(hfilePath, mOutputTable);
validateOutputTable();
} finally {
mFS.delete(hfileDirPath, true);
}
}
private void validateOutputTable() throws Exception {
final KijiDataRequest kdr = KijiDataRequest.create("primitives");
final Map<String, KijiRowData> rows = toRowMap(mOutputTable, kdr);
assertEquals(10, rows.size());
for (int i = 1; i <= 10; ++i) {
final String rowId = String.format("row%d", i);
assertTrue(rows.containsKey(rowId));
assertTrue(
rows.get(rowId).getMostRecentValue("primitives", "string").toString()
.startsWith(rowId + "-"));
}
}
// TODO: tests with # of splits > 1.
}