/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.mapred; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.anyObject; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import java.io.IOException; import java.util.Arrays; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.util.Bytes; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; /** * This tests the TableInputFormat and its recovery semantics * */ @Category(LargeTests.class) public class TestTableInputFormat { private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class); private final static HBaseTestingUtility UTIL = new HBaseTestingUtility(); static final byte[] FAMILY = Bytes.toBytes("family"); private static final byte[][] columns = new byte[][] { FAMILY }; @BeforeClass public static void beforeClass() throws Exception { UTIL.startMiniCluster(); } @AfterClass public static void afterClass() throws Exception { UTIL.shutdownMiniCluster(); } @Before public void before() throws IOException { LOG.info("before"); UTIL.ensureSomeRegionServersAvailable(1); LOG.info("before done"); } /** * Setup a table with two rows and values. * * @param tableName * @return * @throws IOException */ public static HTable createTable(byte[] tableName) throws IOException { HTable table = UTIL.createTable(tableName, FAMILY); Put p = new Put("aaa".getBytes()); p.add(FAMILY, null, "value aaa".getBytes()); table.put(p); p = new Put("bbb".getBytes()); p.add(FAMILY, null, "value bbb".getBytes()); table.put(p); return table; } /** * Verify that the result and key have expected values. * * @param r * @param key * @param expectedKey * @param expectedValue * @return */ static boolean checkResult(Result r, ImmutableBytesWritable key, byte[] expectedKey, byte[] expectedValue) { assertEquals(0, key.compareTo(expectedKey)); Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY); byte[] value = vals.values().iterator().next(); assertTrue(Arrays.equals(value, expectedValue)); return true; // if succeed } /** * Create table data and run tests on specified htable using the * o.a.h.hbase.mapred API. * * @param table * @throws IOException */ static void runTestMapred(HTable table) throws IOException { org.apache.hadoop.hbase.mapred.TableRecordReader trr = new org.apache.hadoop.hbase.mapred.TableRecordReader(); trr.setStartRow("aaa".getBytes()); trr.setEndRow("zzz".getBytes()); trr.setHTable(table); trr.setInputColumns(columns); trr.init(); Result r = new Result(); ImmutableBytesWritable key = new ImmutableBytesWritable(); boolean more = trr.next(key, r); assertTrue(more); checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes()); more = trr.next(key, r); assertTrue(more); checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes()); // no more data more = trr.next(key, r); assertFalse(more); } /** * Create table data and run tests on specified htable using the * o.a.h.hbase.mapreduce API. * * @param table * @throws IOException * @throws InterruptedException */ static void runTestMapreduce(HTable table) throws IOException, InterruptedException { org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl trr = new org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl(); Scan s = new Scan(); s.setStartRow("aaa".getBytes()); s.setStopRow("zzz".getBytes()); s.addFamily(FAMILY); trr.setScan(s); trr.setHTable(table); trr.initialize(null, null); Result r = new Result(); ImmutableBytesWritable key = new ImmutableBytesWritable(); boolean more = trr.nextKeyValue(); assertTrue(more); key = trr.getCurrentKey(); r = trr.getCurrentValue(); checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes()); more = trr.nextKeyValue(); assertTrue(more); key = trr.getCurrentKey(); r = trr.getCurrentValue(); checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes()); // no more data more = trr.nextKeyValue(); assertFalse(more); } /** * Create a table that IOE's on first scanner next call * * @throws IOException */ static HTable createIOEScannerTable(byte[] name, final int failCnt) throws IOException { // build up a mock scanner stuff to fail the first time Answer<ResultScanner> a = new Answer<ResultScanner>() { int cnt = 0; @Override public ResultScanner answer(InvocationOnMock invocation) throws Throwable { // first invocation return the busted mock scanner if (cnt++ < failCnt) { // create mock ResultScanner that always fails. Scan scan = mock(Scan.class); doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe ResultScanner scanner = mock(ResultScanner.class); // simulate TimeoutException / IOException doThrow(new IOException("Injected exception")).when(scanner).next(); return scanner; } // otherwise return the real scanner. return (ResultScanner) invocation.callRealMethod(); } }; HTable htable = spy(createTable(name)); doAnswer(a).when(htable).getScanner((Scan) anyObject()); return htable; } /** * Create a table that throws a DoNoRetryIOException on first scanner next * call * * @throws IOException */ static HTable createDNRIOEScannerTable(byte[] name, final int failCnt) throws IOException { // build up a mock scanner stuff to fail the first time Answer<ResultScanner> a = new Answer<ResultScanner>() { int cnt = 0; @Override public ResultScanner answer(InvocationOnMock invocation) throws Throwable { // first invocation return the busted mock scanner if (cnt++ < failCnt) { // create mock ResultScanner that always fails. Scan scan = mock(Scan.class); doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe ResultScanner scanner = mock(ResultScanner.class); invocation.callRealMethod(); // simulate UnknownScannerException doThrow( new UnknownScannerException("Injected simulated TimeoutException")) .when(scanner).next(); return scanner; } // otherwise return the real scanner. return (ResultScanner) invocation.callRealMethod(); } }; HTable htable = spy(createTable(name)); doAnswer(a).when(htable).getScanner((Scan) anyObject()); return htable; } /** * Run test assuming no errors using mapred api. * * @throws IOException */ @Test public void testTableRecordReader() throws IOException { HTable table = createTable("table1".getBytes()); runTestMapred(table); } /** * Run test assuming Scanner IOException failure using mapred api, * * @throws IOException */ @Test public void testTableRecordReaderScannerFail() throws IOException { HTable htable = createIOEScannerTable("table2".getBytes(), 1); runTestMapred(htable); } /** * Run test assuming Scanner IOException failure using mapred api, * * @throws IOException */ @Test(expected = IOException.class) public void testTableRecordReaderScannerFailTwice() throws IOException { HTable htable = createIOEScannerTable("table3".getBytes(), 2); runTestMapred(htable); } /** * Run test assuming UnknownScannerException (which is a type of * DoNotRetryIOException) using mapred api. * * @throws DoNotRetryIOException */ @Test public void testTableRecordReaderScannerTimeout() throws IOException { HTable htable = createDNRIOEScannerTable("table4".getBytes(), 1); runTestMapred(htable); } /** * Run test assuming UnknownScannerException (which is a type of * DoNotRetryIOException) using mapred api. * * @throws DoNotRetryIOException */ @Test(expected = DoNotRetryIOException.class) public void testTableRecordReaderScannerTimeoutTwice() throws IOException { HTable htable = createDNRIOEScannerTable("table5".getBytes(), 2); runTestMapred(htable); } /** * Run test assuming no errors using newer mapreduce api * * @throws IOException * @throws InterruptedException */ @Test public void testTableRecordReaderMapreduce() throws IOException, InterruptedException { HTable table = createTable("table1-mr".getBytes()); runTestMapreduce(table); } /** * Run test assuming Scanner IOException failure using newer mapreduce api * * @throws IOException * @throws InterruptedException */ @Test public void testTableRecordReaderScannerFailMapreduce() throws IOException, InterruptedException { HTable htable = createIOEScannerTable("table2-mr".getBytes(), 1); runTestMapreduce(htable); } /** * Run test assuming Scanner IOException failure using newer mapreduce api * * @throws IOException * @throws InterruptedException */ @Test(expected = IOException.class) public void testTableRecordReaderScannerFailMapreduceTwice() throws IOException, InterruptedException { HTable htable = createIOEScannerTable("table3-mr".getBytes(), 2); runTestMapreduce(htable); } /** * Run test assuming UnknownScannerException (which is a type of * DoNotRetryIOException) using newer mapreduce api * * @throws InterruptedException * @throws DoNotRetryIOException */ @Test public void testTableRecordReaderScannerTimeoutMapreduce() throws IOException, InterruptedException { HTable htable = createDNRIOEScannerTable("table4-mr".getBytes(), 1); runTestMapreduce(htable); } /** * Run test assuming UnknownScannerException (which is a type of * DoNotRetryIOException) using newer mapreduce api * * @throws InterruptedException * @throws DoNotRetryIOException */ @Test(expected = DoNotRetryIOException.class) public void testTableRecordReaderScannerTimeoutMapreduceTwice() throws IOException, InterruptedException { HTable htable = createDNRIOEScannerTable("table5-mr".getBytes(), 2); runTestMapreduce(htable); } }