/** * Copyright 2009 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.mapreduce; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotSame; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.Arrays; import java.util.Random; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.PerformanceEvaluation; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; /** * Simple test for {@link KeyValueSortReducer} and {@link HFileOutputFormat}. * Sets up and runs a mapreduce job that writes hfile output. * Creates a few inner classes to implement splits and an inputformat that * emits keys and values like those of {@link PerformanceEvaluation}. Makes * as many splits as "mapred.map.tasks" maps. */ public class TestHFileOutputFormat { private final static int ROWSPERSPLIT = 1024; private static final byte[] FAMILY_NAME = PerformanceEvaluation.FAMILY_NAME; private static final byte[] TABLE_NAME = Bytes.toBytes("TestTable"); private HBaseTestingUtility util = new HBaseTestingUtility(); private static Log LOG = LogFactory.getLog(TestHFileOutputFormat.class); /** * Simple mapper that makes KeyValue output. */ static class RandomKVGeneratingMapper extends Mapper<NullWritable, NullWritable, ImmutableBytesWritable, KeyValue> { private int keyLength; private static final int KEYLEN_DEFAULT=10; private static final String KEYLEN_CONF="randomkv.key.length"; private int valLength; private static final int VALLEN_DEFAULT=10; private static final String VALLEN_CONF="randomkv.val.length"; @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT); valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT); } protected void map( NullWritable n1, NullWritable n2, Mapper<NullWritable, NullWritable, ImmutableBytesWritable,KeyValue>.Context context) throws java.io.IOException ,InterruptedException { byte keyBytes[] = new byte[keyLength]; byte valBytes[] = new byte[valLength]; int taskId = context.getTaskAttemptID().getTaskID().getId(); assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!"; Random random = new Random(); for (int i = 0; i < ROWSPERSPLIT; i++) { random.nextBytes(keyBytes); // Ensure that unique tasks generate unique keys keyBytes[keyLength - 1] = (byte)(taskId & 0xFF); random.nextBytes(valBytes); ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes); KeyValue kv = new KeyValue(keyBytes, PerformanceEvaluation.FAMILY_NAME, PerformanceEvaluation.QUALIFIER_NAME, valBytes); context.write(key, kv); } } } @Before public void cleanupDir() throws IOException { util.cleanupTestDir(); } private void setupRandomGeneratorMapper(Job job) { job.setInputFormatClass(NMapInputFormat.class); job.setMapperClass(RandomKVGeneratingMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); } /** * Test that {@link HFileOutputFormat} RecordWriter amends timestamps if * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}. * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a> */ @Test public void test_LATEST_TIMESTAMP_isReplaced() throws IOException, InterruptedException { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, KeyValue> writer = null; TaskAttemptContext context = null; Path dir = HBaseTestingUtility.getTestDir("test_LATEST_TIMESTAMP_isReplaced"); try { Job job = new Job(conf); FileOutputFormat.setOutputPath(job, dir); context = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()); HFileOutputFormat hof = new HFileOutputFormat(); writer = hof.getRecordWriter(context); final byte [] b = Bytes.toBytes("b"); // Test 1. Pass a KV that has a ts of LATEST_TIMESTAMP. It should be // changed by call to write. Check all in kv is same but ts. KeyValue kv = new KeyValue(b, b, b); KeyValue original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertFalse(original.equals(kv)); assertTrue(Bytes.equals(original.getRow(), kv.getRow())); assertTrue(original.matchingColumn(kv.getFamily(), kv.getQualifier())); assertNotSame(original.getTimestamp(), kv.getTimestamp()); assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp()); // Test 2. Now test passing a kv that has explicit ts. It should not be // changed by call to record write. kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b); original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertTrue(original.equals(kv)); } finally { if (writer != null && context != null) writer.close(context); dir.getFileSystem(conf).delete(dir, true); } } /** * Run small MR job. */ @Test public void testWritingPEData() throws Exception { Configuration conf = util.getConfiguration(); Path testDir = HBaseTestingUtility.getTestDir("testWritingPEData"); FileSystem fs = testDir.getFileSystem(conf); // Set down this value or we OOME in eclipse. conf.setInt("io.sort.mb", 20); // Write a few files. conf.setLong("hbase.hregion.max.filesize", 64 * 1024); Job job = new Job(conf, "testWritingPEData"); setupRandomGeneratorMapper(job); // This partitioner doesn't work well for number keys but using it anyways // just to demonstrate how to configure it. byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT]; byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT]; Arrays.fill(startKey, (byte)0); Arrays.fill(endKey, (byte)0xff); job.setPartitionerClass(SimpleTotalOrderPartitioner.class); // Set start and end rows for partitioner. SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey); SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey); job.setReducerClass(KeyValueSortReducer.class); job.setOutputFormatClass(HFileOutputFormat.class); job.setNumReduceTasks(4); FileOutputFormat.setOutputPath(job, testDir); assertTrue(job.waitForCompletion(false)); FileStatus [] files = fs.listStatus(testDir); assertTrue(files.length > 0); } @Test public void testJobConfiguration() throws Exception { Job job = new Job(); HTable table = Mockito.mock(HTable.class); byte[][] mockKeys = new byte[][] { HConstants.EMPTY_BYTE_ARRAY, Bytes.toBytes("aaa"), Bytes.toBytes("ggg"), Bytes.toBytes("zzz") }; Mockito.doReturn(mockKeys).when(table).getStartKeys(); HFileOutputFormat.configureIncrementalLoad(job, table); assertEquals(job.getNumReduceTasks(), 4); } private byte [][] generateRandomStartKeys(int numKeys) { Random random = new Random(); byte[][] ret = new byte[numKeys][]; // first region start key is always empty ret[0] = HConstants.EMPTY_BYTE_ARRAY; for (int i = 1; i < numKeys; i++) { ret[i] = PerformanceEvaluation.generateValue(random); } return ret; } @Test public void testMRIncrementalLoad() throws Exception { doIncrementalLoadTest(false); } @Test public void testMRIncrementalLoadWithSplit() throws Exception { doIncrementalLoadTest(true); } private void doIncrementalLoadTest( boolean shouldChangeRegions) throws Exception { Configuration conf = util.getConfiguration(); Path testDir = HBaseTestingUtility.getTestDir("testLocalMRIncrementalLoad"); byte[][] startKeys = generateRandomStartKeys(5); try { util.startMiniCluster(); HBaseAdmin admin = new HBaseAdmin(conf); HTable table = util.createTable(TABLE_NAME, FAMILY_NAME); int numRegions = util.createMultiRegions( util.getConfiguration(), table, FAMILY_NAME, startKeys); assertEquals("Should make 5 regions", numRegions, 5); assertEquals("Should start with empty table", 0, util.countRows(table)); // Generate the bulk load files util.startMiniMapReduceCluster(); runIncrementalPELoad(conf, table, testDir); // This doesn't write into the table, just makes files assertEquals("HFOF should not touch actual table", 0, util.countRows(table)); if (shouldChangeRegions) { LOG.info("Changing regions in table"); admin.disableTable(table.getTableName()); while(util.getMiniHBaseCluster().getMaster().getAssignmentManager(). isRegionsInTransition()) { Threads.sleep(1000); LOG.info("Waiting on table to finish disabling"); } byte[][] newStartKeys = generateRandomStartKeys(15); util.createMultiRegions(util.getConfiguration(), table, FAMILY_NAME, newStartKeys); admin.enableTable(table.getTableName()); while (table.getRegionsInfo().size() != 15 || !admin.isTableAvailable(table.getTableName())) { Thread.sleep(1000); LOG.info("Waiting for new region assignment to happen"); } } // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table); // Ensure data shows up int expectedRows = conf.getInt("mapred.map.tasks", 1) * ROWSPERSPLIT; assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); String tableDigestBefore = util.checksumRows(table); // Cause regions to reopen admin.disableTable(TABLE_NAME); while (!admin.isTableDisabled(TABLE_NAME)) { Thread.sleep(1000); LOG.info("Waiting for table to disable"); } admin.enableTable(TABLE_NAME); util.waitTableAvailable(TABLE_NAME, 30000); assertEquals("Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table)); } finally { util.shutdownMiniMapReduceCluster(); util.shutdownMiniCluster(); } } private void runIncrementalPELoad( Configuration conf, HTable table, Path outDir) throws Exception { Job job = new Job(conf, "testLocalMRIncrementalLoad"); setupRandomGeneratorMapper(job); HFileOutputFormat.configureIncrementalLoad(job, table); FileOutputFormat.setOutputPath(job, outDir); assertEquals(table.getRegionsInfo().size(), job.getNumReduceTasks()); assertTrue(job.waitForCompletion(true)); } public static void main(String args[]) throws Exception { new TestHFileOutputFormat().manualTest(args); } public void manualTest(String args[]) throws Exception { Configuration conf = HBaseConfiguration.create(); util = new HBaseTestingUtility(conf); if ("newtable".equals(args[0])) { byte[] tname = args[1].getBytes(); HTable table = util.createTable(tname, FAMILY_NAME); HBaseAdmin admin = new HBaseAdmin(conf); admin.disableTable(tname); util.createMultiRegions(conf, table, FAMILY_NAME, generateRandomStartKeys(5)); admin.enableTable(tname); } else if ("incremental".equals(args[0])) { byte[] tname = args[1].getBytes(); HTable table = new HTable(conf, tname); Path outDir = new Path("incremental-out"); runIncrementalPELoad(conf, table, outDir); } else { throw new RuntimeException( "usage: TestHFileOutputFormat newtable | incremental"); } } }