/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.mapreduce.output; import static org.apache.hadoop.hbase.util.Bytes.toBytes; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; import java.util.List; import java.util.concurrent.atomic.AtomicLong; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.TaskType; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.kiji.mapreduce.framework.HFileKeyValue; import org.kiji.mapreduce.framework.KijiConfKeys; import org.kiji.mapreduce.output.framework.KijiHFileOutputFormat; import org.kiji.mapreduce.platform.KijiMRPlatformBridge; import org.kiji.schema.Kiji; import org.kiji.schema.KijiURI; import org.kiji.schema.impl.hbase.HBaseKijiInstaller; import org.kiji.schema.layout.KijiTableLayout; import org.kiji.schema.layout.KijiTableLayouts; import org.kiji.schema.layout.impl.ColumnId; /** Tests for KijiHFileOutputFormat. */ public class TestKijiHFileOutputFormat { private static final Logger LOG = LoggerFactory.getLogger(TestKijiHFileOutputFormat.class); /** Counter for fake instance IDs. */ private static final AtomicLong FAKE_INSTANCE_COUNTER = new AtomicLong(0); /** NullWritable shortcut. */ private static final NullWritable NW = NullWritable.get(); /** * Makes a dummy byte array. * * @param value Byte value to repeat. * @param nbytes Number of bytes. * @return a byte array with the specified number of bytes and the specified byte value. */ private static byte[] makeBytes(int value, int nbytes) { final byte[] bytes = new byte[nbytes]; for (int i = 0; i < nbytes; ++i) { bytes[i] = (byte) value; } return bytes; } /** * Makes an HFile put entry (KeyValue writable-comparable). * * @param row Row key. * @param family HBase family (as a Kiji locality group column ID). * @param qualifier HBase qualifier. * @param timestamp Cell timestamp. * @param value Cell content bytes. * @return a new HFileKeyValue with the specified parameters. */ private static HFileKeyValue entry( String row, ColumnId family, String qualifier, long timestamp, byte[] value) { return new HFileKeyValue( toBytes(row), family.toByteArray(), toBytes(qualifier), timestamp, value); } /** * Makes an HFile delete entry (KeyValue writable-comparable). * * @param row Row key. * @param family HBase family (as a Kiji locality group column ID). * @param qualifier HBase qualifier. * @param timestamp Cell timestamp. * @param type Cell type (put or one of the flavors of delete) * @return a new HFileKeyValue with the specified parameters. */ private static HFileKeyValue entry( String row, ColumnId family, String qualifier, long timestamp, HFileKeyValue.Type type) { return new HFileKeyValue( toBytes(row), family.toByteArray(), toBytes(qualifier), timestamp, type, HConstants.EMPTY_BYTE_ARRAY); } /** * Loads an HFile content into a list of KeyValue entries. * * @param path Path of the HFile to load. * @param conf Configuration. * @return the content of the specified HFile, as an ordered list of KeyValue entries. * @throws IOException on I/O error. */ private static List<KeyValue> loadHFile(Path path, Configuration conf) throws IOException { final FileSystem fs = path.getFileSystem(conf); final CacheConfig cacheConf = new CacheConfig(conf); final HFile.Reader reader = HFile.createReader(fs, path, cacheConf); final HFileScanner scanner = reader.getScanner(false, false); final List<KeyValue> kvs = Lists.newArrayListWithCapacity((int) reader.getEntries()); boolean hasNext = scanner.seekTo(); while (hasNext) { kvs.add(scanner.getKeyValue()); hasNext = scanner.next(); } reader.close(); return kvs; } /** * Asserts the content of an HFile. * * @param path Path of the HFile to validate the content of. * @param values Expected KeyValue entries, in order. * @throws IOException on I/O error. */ private void assertHFileContent(Path path, KeyValue... values) throws IOException { final FileSystem fs = path.getFileSystem(mConf); assertTrue(String.format("HFile '%s' does not exist.", path), fs.exists(path)); final List<KeyValue> kvs = loadHFile(path, mConf); assertEquals(kvs.size(), values.length); for (int i = 0; i < values.length; ++i) { assertEquals(kvs.get(i), values[i]); } } private Configuration mConf; private KijiURI mTableURI; private Kiji mKiji; private File mTempDir; private KijiTableLayout mLayout; private KijiHFileOutputFormat mFormat; private ColumnId mDefaultLGId; private ColumnId mInMemoryLGId; @Before public void setUp() throws Exception { mConf = HBaseConfiguration.create(); mTableURI = KijiURI.newBuilder(String.format( "kiji://.fake.%s/default/user", FAKE_INSTANCE_COUNTER.getAndIncrement())).build(); mTempDir = File.createTempFile("test-" + System.currentTimeMillis() + "-", ""); Preconditions.checkState(mTempDir.delete()); Preconditions.checkState(mTempDir.mkdir()); mConf.set("fs.defaultFS", "file://" + mTempDir.toString()); mConf.set("mapreduce.output.fileoutputformat.outputdir", "file://" + mTempDir.toString()); HBaseKijiInstaller.get().install(mTableURI, mConf); mKiji = Kiji.Factory.open(mTableURI); mLayout = KijiTableLayout.newLayout(KijiTableLayouts.getLayout(KijiTableLayouts.FULL_FEATURED)); mKiji.createTable("user", mLayout); mDefaultLGId = mLayout.getLocalityGroupMap().get("default").getId(); mInMemoryLGId = mLayout.getLocalityGroupMap().get("inMemory").getId(); mConf.set(KijiConfKeys.KIJI_OUTPUT_TABLE_URI, mTableURI.toString()); mFormat = new KijiHFileOutputFormat(); } @After public void tearDown() throws Exception { FileUtils.deleteDirectory(mTempDir); } @Test public void testMaxHFileSizeSameRow() throws Exception { final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = KijiMRPlatformBridge.get().newTaskAttemptID( "jobTracker_jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = KijiMRPlatformBridge.get().newTaskAttemptContext( mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW); writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(!fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00001"))); mFormat.getOutputCommitter(context).commitTask(context); } @Test public void testMaxHFileSizeNewRow() throws Exception { final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = KijiMRPlatformBridge.get().newTaskAttemptID( "jobTracker_jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = KijiMRPlatformBridge.get().newTaskAttemptContext( mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW); writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertFalse(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue()); assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00002"))); mFormat.getOutputCommitter(context).commitTask(context); } @Test public void testMultipleLayouts() throws Exception { final TaskAttemptID taskAttemptId = KijiMRPlatformBridge.get().newTaskAttemptID( "jobTracker_jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = KijiMRPlatformBridge.get().newTaskAttemptContext( mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); writer.write(defaultEntry, NW); final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024)); writer.write(inMemoryEntry, NW); try { // Test with an invalid locality group ID: final ColumnId invalid = new ColumnId(1234); assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid)); writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW); fail("Output format did not fail on unknown locality group IDs."); } catch (IllegalArgumentException iae) { LOG.info("Expected error: " + iae); } writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue()); assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue()); mFormat.getOutputCommitter(context).commitTask(context); } @Test public void testTombstonesInHFile() throws Exception { final HFileKeyValue put = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue deleteCell = entry("row-key2", mDefaultLGId, "a", 1L, HFileKeyValue.Type.DeleteCell); final HFileKeyValue deleteColumn = entry("row-key3", mDefaultLGId, "a", 1L, HFileKeyValue.Type.DeleteColumn); final HFileKeyValue deleteFamily = entry("row-key4", mDefaultLGId, "a", 1L, HFileKeyValue.Type.DeleteFamily); final TaskAttemptID taskAttemptId = KijiMRPlatformBridge.get().newTaskAttemptID( "jobTracker_jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = KijiMRPlatformBridge.get().newTaskAttemptContext( mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(put, NW); writer.write(deleteCell, NW); writer.write(deleteColumn, NW); writer.write(deleteFamily, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); assertHFileContent( new Path(defaultDir, "00000"), put.getKeyValue(), deleteCell.getKeyValue(), deleteColumn.getKeyValue(), deleteFamily.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00001"))); mFormat.getOutputCommitter(context).commitTask(context); } }