/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.mapred; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MapReduceTests; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.RunningJob; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; @Category({MapReduceTests.class, LargeTests.class}) public class TestTableMapReduceUtil { private static final Log LOG = LogFactory .getLog(TestTableMapReduceUtil.class); private static Table presidentsTable; private static final String TABLE_NAME = "People"; private static final byte[] COLUMN_FAMILY = Bytes.toBytes("info"); private static final byte[] COLUMN_QUALIFIER = Bytes.toBytes("name"); private static ImmutableSet<String> presidentsRowKeys = ImmutableSet.of( "president1", "president2", "president3"); private static Iterator<String> presidentNames = ImmutableSet.of( "John F. Kennedy", "George W. Bush", "Barack Obama").iterator(); private static ImmutableSet<String> actorsRowKeys = ImmutableSet.of("actor1", "actor2"); private static Iterator<String> actorNames = ImmutableSet.of( "Jack Nicholson", "Martin Freeman").iterator(); private static String PRESIDENT_PATTERN = "president"; private static String ACTOR_PATTERN = "actor"; private static ImmutableMap<String, ImmutableSet<String>> relation = ImmutableMap .of(PRESIDENT_PATTERN, presidentsRowKeys, ACTOR_PATTERN, actorsRowKeys); private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); @BeforeClass public static void beforeClass() throws Exception { UTIL.startMiniCluster(); presidentsTable = createAndFillTable(TableName.valueOf(TABLE_NAME)); } @AfterClass public static void afterClass() throws Exception { UTIL.shutdownMiniCluster(); } @Before public void before() throws IOException { LOG.info("before"); UTIL.ensureSomeRegionServersAvailable(1); LOG.info("before done"); } public static Table createAndFillTable(TableName tableName) throws IOException { Table table = UTIL.createTable(tableName, COLUMN_FAMILY); createPutCommand(table); return table; } private static void createPutCommand(Table table) throws IOException { for (String president : presidentsRowKeys) { if (presidentNames.hasNext()) { Put p = new Put(Bytes.toBytes(president)); p.addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(presidentNames.next())); table.put(p); } } for (String actor : actorsRowKeys) { if (actorNames.hasNext()) { Put p = new Put(Bytes.toBytes(actor)); p.addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(actorNames.next())); table.put(p); } } } /** * Check what the given number of reduce tasks for the given job configuration * does not exceed the number of regions for the given table. */ @Test public void shouldNumberOfReduceTaskNotExceedNumberOfRegionsForGivenTable() throws IOException { Assert.assertNotNull(presidentsTable); Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf); TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf); TableMapReduceUtil.setScannerCaching(jobConf, 100); assertEquals(1, jobConf.getNumReduceTasks()); assertEquals(100, jobConf.getInt("hbase.client.scanner.caching", 0)); jobConf.setNumReduceTasks(10); TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf); TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf); assertEquals(1, jobConf.getNumReduceTasks()); } @Test public void shouldNumberOfMapTaskNotExceedNumberOfRegionsForGivenTable() throws IOException { Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf); TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf); assertEquals(1, jobConf.getNumMapTasks()); jobConf.setNumMapTasks(10); TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf); TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf); assertEquals(1, jobConf.getNumMapTasks()); } @Test @SuppressWarnings("deprecation") public void shoudBeValidMapReduceEvaluation() throws Exception { Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); try { jobConf.setJobName("process row task"); jobConf.setNumReduceTasks(1); TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY), ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class, jobConf); TableMapReduceUtil.initTableReduceJob(TABLE_NAME, ClassificatorRowReduce.class, jobConf); RunningJob job = JobClient.runJob(jobConf); assertTrue(job.isSuccessful()); } finally { if (jobConf != null) FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir"))); } } @Test @SuppressWarnings("deprecation") public void shoudBeValidMapReduceWithPartitionerEvaluation() throws IOException { Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); try { jobConf.setJobName("process row task"); jobConf.setNumReduceTasks(2); TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY), ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class, jobConf); TableMapReduceUtil.initTableReduceJob(TABLE_NAME, ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class); RunningJob job = JobClient.runJob(jobConf); assertTrue(job.isSuccessful()); } finally { if (jobConf != null) FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir"))); } } @SuppressWarnings("deprecation") static class ClassificatorRowReduce extends MapReduceBase implements TableReduce<ImmutableBytesWritable, Put> { @Override public void reduce(ImmutableBytesWritable key, Iterator<Put> values, OutputCollector<ImmutableBytesWritable, Put> output, Reporter reporter) throws IOException { String strKey = Bytes.toString(key.get()); List<Put> result = new ArrayList<>(); while (values.hasNext()) result.add(values.next()); if (relation.keySet().contains(strKey)) { Set<String> set = relation.get(strKey); if (set != null) { assertEquals(set.size(), result.size()); } else { throwAccertionError("Test infrastructure error: set is null"); } } else { throwAccertionError("Test infrastructure error: key not found in map"); } } private void throwAccertionError(String errorMessage) throws AssertionError { throw new AssertionError(errorMessage); } } @SuppressWarnings("deprecation") static class ClassificatorMapper extends MapReduceBase implements TableMap<ImmutableBytesWritable, Put> { @Override public void map(ImmutableBytesWritable row, Result result, OutputCollector<ImmutableBytesWritable, Put> outCollector, Reporter reporter) throws IOException { String rowKey = Bytes.toString(result.getRow()); final ImmutableBytesWritable pKey = new ImmutableBytesWritable( Bytes.toBytes(PRESIDENT_PATTERN)); final ImmutableBytesWritable aKey = new ImmutableBytesWritable( Bytes.toBytes(ACTOR_PATTERN)); ImmutableBytesWritable outKey = null; if (rowKey.startsWith(PRESIDENT_PATTERN)) { outKey = pKey; } else if (rowKey.startsWith(ACTOR_PATTERN)) { outKey = aKey; } else { throw new AssertionError("unexpected rowKey"); } String name = Bytes.toString(result.getValue(COLUMN_FAMILY, COLUMN_QUALIFIER)); outCollector.collect(outKey, new Put(Bytes.toBytes("rowKey2")) .addColumn(COLUMN_FAMILY, COLUMN_QUALIFIER, Bytes.toBytes(name))); } } }