/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.flume.sink.hbase; import com.google.common.base.Charsets; import com.google.common.base.Throwables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.primitives.Longs; import org.apache.flume.Channel; import org.apache.flume.ChannelException; import org.apache.flume.Context; import org.apache.flume.Event; import org.apache.flume.EventDeliveryException; import org.apache.flume.FlumeException; import org.apache.flume.Sink.Status; import org.apache.flume.Transaction; import org.apache.flume.channel.MemoryChannel; import org.apache.flume.conf.Configurables; import org.apache.flume.event.EventBuilder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Increment; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.ZKConfig; import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.lang.reflect.Method; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.NavigableMap; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.spy; public class TestHBaseSink { private static final Logger logger = LoggerFactory.getLogger(TestHBaseSink.class); private static final HBaseTestingUtility testUtility = new HBaseTestingUtility(); private static final String tableName = "TestHbaseSink"; private static final String columnFamily = "TestColumnFamily"; private static final String inColumn = "iCol"; private static final String plCol = "pCol"; private static final String valBase = "testing hbase sink: jham"; private Configuration conf; private Context ctx; @BeforeClass public static void setUpOnce() throws Exception { testUtility.startMiniCluster(); } @AfterClass public static void tearDownOnce() throws Exception { testUtility.shutdownMiniCluster(); } /** * Most common context setup for unit tests using * {@link SimpleHbaseEventSerializer}. */ @Before public void setUp() throws IOException { conf = new Configuration(testUtility.getConfiguration()); ctx = new Context(); testUtility.createTable(tableName.getBytes(), columnFamily.getBytes()); } @After public void tearDown() throws IOException { testUtility.deleteTable(tableName.getBytes()); } /** * Set up {@link Context} for use with {@link SimpleHbaseEventSerializer}. */ private void initContextForSimpleHbaseEventSerializer() { ctx = new Context(); ctx.put("table", tableName); ctx.put("columnFamily", columnFamily); ctx.put("serializer", SimpleHbaseEventSerializer.class.getName()); ctx.put("serializer.payloadColumn", plCol); ctx.put("serializer.incrementColumn", inColumn); } /** * Set up {@link Context} for use with {@link IncrementHBaseSerializer}. */ private void initContextForIncrementHBaseSerializer() { ctx = new Context(); ctx.put("table", tableName); ctx.put("columnFamily", columnFamily); ctx.put("serializer", IncrementHBaseSerializer.class.getName()); } @Test public void testOneEventWithDefaults() throws Exception { //Create a context without setting increment column and payload Column ctx = new Context(); ctx.put("table", tableName); ctx.put("columnFamily", columnFamily); ctx.put("serializer", SimpleHbaseEventSerializer.class.getName()); HBaseSink sink = new HBaseSink(conf); Configurables.configure(sink, ctx); Channel channel = new MemoryChannel(); Configurables.configure(channel, new Context()); sink.setChannel(channel); sink.start(); Transaction tx = channel.getTransaction(); tx.begin(); Event e = EventBuilder.withBody(Bytes.toBytes(valBase)); channel.put(e); tx.commit(); tx.close(); sink.process(); sink.stop(); HTable table = new HTable(conf, tableName); byte[][] results = getResults(table, 1); byte[] out = results[0]; Assert.assertArrayEquals(e.getBody(), out); out = results[1]; Assert.assertArrayEquals(Longs.toByteArray(1), out); } @Test public void testOneEvent() throws Exception { initContextForSimpleHbaseEventSerializer(); HBaseSink sink = new HBaseSink(conf); Configurables.configure(sink, ctx); Channel channel = new MemoryChannel(); Configurables.configure(channel, new Context()); sink.setChannel(channel); sink.start(); Transaction tx = channel.getTransaction(); tx.begin(); Event e = EventBuilder.withBody( Bytes.toBytes(valBase)); channel.put(e); tx.commit(); tx.close(); sink.process(); sink.stop(); HTable table = new HTable(conf, tableName); byte[][] results = getResults(table, 1); byte[] out = results[0]; Assert.assertArrayEquals(e.getBody(), out); out = results[1]; Assert.assertArrayEquals(Longs.toByteArray(1), out); } @Test public void testThreeEvents() throws Exception { initContextForSimpleHbaseEventSerializer(); ctx.put("batchSize", "3"); HBaseSink sink = new HBaseSink(conf); Configurables.configure(sink, ctx); Channel channel = new MemoryChannel(); Configurables.configure(channel, new Context()); sink.setChannel(channel); sink.start(); Transaction tx = channel.getTransaction(); tx.begin(); for (int i = 0; i < 3; i++) { Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i)); channel.put(e); } tx.commit(); tx.close(); sink.process(); sink.stop(); HTable table = new HTable(conf, tableName); byte[][] results = getResults(table, 3); byte[] out; int found = 0; for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) { found++; break; } } } Assert.assertEquals(3, found); out = results[3]; Assert.assertArrayEquals(Longs.toByteArray(3), out); } @Test public void testMultipleBatches() throws Exception { initContextForSimpleHbaseEventSerializer(); ctx.put("batchSize", "2"); HBaseSink sink = new HBaseSink(conf); Configurables.configure(sink, ctx); //Reset the context to a higher batchSize ctx.put("batchSize", "100"); Channel channel = new MemoryChannel(); Configurables.configure(channel, new Context()); sink.setChannel(channel); sink.start(); Transaction tx = channel.getTransaction(); tx.begin(); for (int i = 0; i < 3; i++) { Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i)); channel.put(e); } tx.commit(); tx.close(); int count = 0; while (sink.process() != Status.BACKOFF) { count++; } sink.stop(); Assert.assertEquals(2, count); HTable table = new HTable(conf, tableName); byte[][] results = getResults(table, 3); byte[] out; int found = 0; for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) { found++; break; } } } Assert.assertEquals(3, found); out = results[3]; Assert.assertArrayEquals(Longs.toByteArray(3), out); } @Test(expected = FlumeException.class) public void testMissingTable() throws Exception { logger.info("Running testMissingTable()"); initContextForSimpleHbaseEventSerializer(); // setUp() will create the table, so we delete it. logger.info("Deleting table {}", tableName); testUtility.deleteTable(tableName.getBytes()); ctx.put("batchSize", "2"); HBaseSink sink = new HBaseSink(conf); Configurables.configure(sink, ctx); //Reset the context to a higher batchSize ctx.put("batchSize", "100"); Channel channel = new MemoryChannel(); Configurables.configure(channel, new Context()); sink.setChannel(channel); logger.info("Writing data into channel"); Transaction tx = channel.getTransaction(); tx.begin(); for (int i = 0; i < 3; i++) { Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i)); channel.put(e); } tx.commit(); tx.close(); logger.info("Starting sink and processing events"); try { logger.info("Calling sink.start()"); sink.start(); // This method will throw. // We never get here, but we log in case the behavior changes. logger.error("Unexpected error: Calling sink.process()"); sink.process(); logger.error("Unexpected error: Calling sink.stop()"); sink.stop(); } finally { // Re-create the table so tearDown() doesn't throw. testUtility.createTable(tableName.getBytes(), columnFamily.getBytes()); } // FIXME: The test should never get here, the below code doesn't run. Assert.fail(); HTable table = new HTable(conf, tableName); byte[][] results = getResults(table, 2); byte[] out; int found = 0; for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) { found++; break; } } } Assert.assertEquals(2, found); out = results[2]; Assert.assertArrayEquals(Longs.toByteArray(2), out); sink.process(); } // TODO: Move this test to a different class and run it stand-alone. /** * This test must run last - it shuts down the minicluster :D * * @throws Exception */ @Ignore("For dev builds only:" + "This test takes too long, and this has to be run after all other" + "tests, since it shuts down the minicluster. " + "Comment out all other tests" + "and uncomment this annotation to run this test.") @Test(expected = EventDeliveryException.class) public void testHBaseFailure() throws Exception { initContextForSimpleHbaseEventSerializer(); ctx.put("batchSize", "2"); HBaseSink sink = new HBaseSink(conf); Configurables.configure(sink, ctx); //Reset the context to a higher batchSize ctx.put("batchSize", "100"); Channel channel = new MemoryChannel(); Configurables.configure(channel, new Context()); sink.setChannel(channel); sink.start(); Transaction tx = channel.getTransaction(); tx.begin(); for (int i = 0; i < 3; i++) { Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i)); channel.put(e); } tx.commit(); tx.close(); sink.process(); HTable table = new HTable(conf, tableName); byte[][] results = getResults(table, 2); byte[] out; int found = 0; for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) { found++; break; } } } Assert.assertEquals(2, found); out = results[2]; Assert.assertArrayEquals(Longs.toByteArray(2), out); testUtility.shutdownMiniCluster(); sink.process(); sink.stop(); } /** * Makes Hbase scans to get rows in the payload column and increment column * in the table given. Expensive, so tread lightly. * Calling this function multiple times for the same result set is a bad * idea. Cache the result set once it is returned by this function. * * @param table * @param numEvents Number of events inserted into the table * @return * @throws IOException */ private byte[][] getResults(HTable table, int numEvents) throws IOException { byte[][] results = new byte[numEvents + 1][]; Scan scan = new Scan(); scan.addColumn(columnFamily.getBytes(), plCol.getBytes()); scan.setStartRow(Bytes.toBytes("default")); ResultScanner rs = table.getScanner(scan); byte[] out = null; int i = 0; try { for (Result r = rs.next(); r != null; r = rs.next()) { out = r.getValue(columnFamily.getBytes(), plCol.getBytes()); if (i >= results.length - 1) { rs.close(); throw new FlumeException("More results than expected in the table." + "Expected = " + numEvents + ". Found = " + i); } results[i++] = out; System.out.println(out); } } finally { rs.close(); } Assert.assertEquals(i, results.length - 1); scan = new Scan(); scan.addColumn(columnFamily.getBytes(), inColumn.getBytes()); scan.setStartRow(Bytes.toBytes("incRow")); rs = table.getScanner(scan); out = null; try { for (Result r = rs.next(); r != null; r = rs.next()) { out = r.getValue(columnFamily.getBytes(), inColumn.getBytes()); results[i++] = out; System.out.println(out); } } finally { rs.close(); } return results; } @Test public void testTransactionStateOnChannelException() throws Exception { initContextForSimpleHbaseEventSerializer(); ctx.put("batchSize", "1"); HBaseSink sink = new HBaseSink(conf); Configurables.configure(sink, ctx); // Reset the context to a higher batchSize Channel channel = spy(new MemoryChannel()); Configurables.configure(channel, new Context()); sink.setChannel(channel); sink.start(); Transaction tx = channel.getTransaction(); tx.begin(); Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + 0)); channel.put(e); tx.commit(); tx.close(); doThrow(new ChannelException("Mock Exception")).when(channel).take(); try { sink.process(); Assert.fail("take() method should throw exception"); } catch (ChannelException ex) { Assert.assertEquals("Mock Exception", ex.getMessage()); } doReturn(e).when(channel).take(); sink.process(); sink.stop(); HTable table = new HTable(conf, tableName); byte[][] results = getResults(table, 1); byte[] out = results[0]; Assert.assertArrayEquals(e.getBody(), out); out = results[1]; Assert.assertArrayEquals(Longs.toByteArray(1), out); } @Test public void testTransactionStateOnSerializationException() throws Exception { initContextForSimpleHbaseEventSerializer(); ctx.put("batchSize", "1"); ctx.put(HBaseSinkConfigurationConstants.CONFIG_SERIALIZER, "org.apache.flume.sink.hbase.MockSimpleHbaseEventSerializer"); HBaseSink sink = new HBaseSink(conf); Configurables.configure(sink, ctx); // Reset the context to a higher batchSize ctx.put("batchSize", "100"); Channel channel = new MemoryChannel(); Configurables.configure(channel, new Context()); sink.setChannel(channel); sink.start(); Transaction tx = channel.getTransaction(); tx.begin(); Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + 0)); channel.put(e); tx.commit(); tx.close(); try { MockSimpleHbaseEventSerializer.throwException = true; sink.process(); Assert.fail("FlumeException expected from serilazer"); } catch (FlumeException ex) { Assert.assertEquals("Exception for testing", ex.getMessage()); } MockSimpleHbaseEventSerializer.throwException = false; sink.process(); sink.stop(); HTable table = new HTable(conf, tableName); byte[][] results = getResults(table, 1); byte[] out = results[0]; Assert.assertArrayEquals(e.getBody(), out); out = results[1]; Assert.assertArrayEquals(Longs.toByteArray(1), out); } @Test public void testWithoutConfigurationObject() throws Exception { initContextForSimpleHbaseEventSerializer(); Context tmpContext = new Context(ctx.getParameters()); tmpContext.put("batchSize", "2"); tmpContext.put(HBaseSinkConfigurationConstants.ZK_QUORUM, ZKConfig.getZKQuorumServersString(conf)); System.out.print(ctx.getString(HBaseSinkConfigurationConstants.ZK_QUORUM)); tmpContext.put(HBaseSinkConfigurationConstants.ZK_ZNODE_PARENT, conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT)); HBaseSink sink = new HBaseSink(); Configurables.configure(sink, tmpContext); Channel channel = new MemoryChannel(); Configurables.configure(channel, ctx); sink.setChannel(channel); sink.start(); Transaction tx = channel.getTransaction(); tx.begin(); for (int i = 0; i < 3; i++) { Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i)); channel.put(e); } tx.commit(); tx.close(); Status status = Status.READY; while (status != Status.BACKOFF) { status = sink.process(); } sink.stop(); HTable table = new HTable(conf, tableName); byte[][] results = getResults(table, 3); byte[] out; int found = 0; for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) { found++; break; } } } Assert.assertEquals(3, found); out = results[3]; Assert.assertArrayEquals(Longs.toByteArray(3), out); } @Test public void testZKQuorum() throws Exception { initContextForSimpleHbaseEventSerializer(); Context tmpContext = new Context(ctx.getParameters()); String zkQuorum = "zk1.flume.apache.org:3342, zk2.flume.apache.org:3342, " + "zk3.flume.apache.org:3342"; tmpContext.put("batchSize", "2"); tmpContext.put(HBaseSinkConfigurationConstants.ZK_QUORUM, zkQuorum); tmpContext.put(HBaseSinkConfigurationConstants.ZK_ZNODE_PARENT, conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT)); HBaseSink sink = new HBaseSink(); Configurables.configure(sink, tmpContext); Assert.assertEquals("zk1.flume.apache.org,zk2.flume.apache.org," + "zk3.flume.apache.org", sink.getConfig().get(HConstants.ZOOKEEPER_QUORUM)); Assert.assertEquals(String.valueOf(3342), sink.getConfig().get(HConstants.ZOOKEEPER_CLIENT_PORT)); } @Test(expected = FlumeException.class) public void testZKQuorumIncorrectPorts() throws Exception { initContextForSimpleHbaseEventSerializer(); Context tmpContext = new Context(ctx.getParameters()); String zkQuorum = "zk1.flume.apache.org:3345, zk2.flume.apache.org:3342, " + "zk3.flume.apache.org:3342"; tmpContext.put("batchSize", "2"); tmpContext.put(HBaseSinkConfigurationConstants.ZK_QUORUM, zkQuorum); tmpContext.put(HBaseSinkConfigurationConstants.ZK_ZNODE_PARENT, conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT)); HBaseSink sink = new HBaseSink(); Configurables.configure(sink, tmpContext); Assert.fail(); } @Test public void testCoalesce() throws EventDeliveryException { initContextForIncrementHBaseSerializer(); ctx.put("batchSize", "100"); ctx.put(HBaseSinkConfigurationConstants.CONFIG_COALESCE_INCREMENTS, String.valueOf(true)); final Map<String, Long> expectedCounts = Maps.newHashMap(); expectedCounts.put("r1:c1", 10L); expectedCounts.put("r1:c2", 20L); expectedCounts.put("r2:c1", 7L); expectedCounts.put("r2:c3", 63L); HBaseSink.DebugIncrementsCallback cb = new CoalesceValidator(expectedCounts); HBaseSink sink = new HBaseSink(testUtility.getConfiguration(), cb); Configurables.configure(sink, ctx); Channel channel = createAndConfigureMemoryChannel(sink); List<Event> events = Lists.newLinkedList(); generateEvents(events, expectedCounts); putEvents(channel, events); sink.start(); sink.process(); // Calls CoalesceValidator instance. sink.stop(); } @Test(expected = AssertionError.class) public void negativeTestCoalesce() throws EventDeliveryException { initContextForIncrementHBaseSerializer(); ctx.put("batchSize", "10"); final Map<String, Long> expectedCounts = Maps.newHashMap(); expectedCounts.put("r1:c1", 10L); HBaseSink.DebugIncrementsCallback cb = new CoalesceValidator(expectedCounts); HBaseSink sink = new HBaseSink(testUtility.getConfiguration(), cb); Configurables.configure(sink, ctx); Channel channel = createAndConfigureMemoryChannel(sink); List<Event> events = Lists.newLinkedList(); generateEvents(events, expectedCounts); putEvents(channel, events); sink.start(); sink.process(); // Calls CoalesceValidator instance. sink.stop(); } @Test public void testBatchAware() throws EventDeliveryException { logger.info("Running testBatchAware()"); initContextForIncrementHBaseSerializer(); HBaseSink sink = new HBaseSink(testUtility.getConfiguration()); Configurables.configure(sink, ctx); Channel channel = createAndConfigureMemoryChannel(sink); sink.start(); int batchCount = 3; for (int i = 0; i < batchCount; i++) { sink.process(); } sink.stop(); Assert.assertEquals(batchCount, ((IncrementHBaseSerializer) sink.getSerializer()).getNumBatchesStarted()); } /** * For testing that the rows coalesced, serialized by * {@link IncrementHBaseSerializer}, are of the expected batch size. */ private static class CoalesceValidator implements HBaseSink.DebugIncrementsCallback { private final Map<String,Long> expectedCounts; private final Method refGetFamilyMap; public CoalesceValidator(Map<String, Long> expectedCounts) { this.expectedCounts = expectedCounts; this.refGetFamilyMap = HBaseSink.reflectLookupGetFamilyMap(); } @Override @SuppressWarnings("unchecked") public void onAfterCoalesce(Iterable<Increment> increments) { for (Increment inc : increments) { byte[] row = inc.getRow(); Map<byte[], NavigableMap<byte[], Long>> families = null; try { families = (Map<byte[], NavigableMap<byte[], Long>>) refGetFamilyMap.invoke(inc); } catch (Exception e) { Throwables.propagate(e); } for (byte[] family : families.keySet()) { NavigableMap<byte[], Long> qualifiers = families.get(family); for (Map.Entry<byte[], Long> entry : qualifiers.entrySet()) { byte[] qualifier = entry.getKey(); Long count = entry.getValue(); StringBuilder b = new StringBuilder(20); b.append(new String(row, Charsets.UTF_8)); b.append(':'); b.append(new String(qualifier, Charsets.UTF_8)); String key = b.toString(); Assert.assertEquals("Expected counts don't match observed for " + key, expectedCounts.get(key), count); } } } } } /** * Add number of Events corresponding to counts to the events list. * @param events Destination list. * @param counts How many events to generate for each row:qualifier pair. */ private void generateEvents(List<Event> events, Map<String, Long> counts) { for (String key : counts.keySet()) { long count = counts.get(key); for (long i = 0; i < count; i++) { events.add(EventBuilder.withBody(key, Charsets.UTF_8)); } } } private Channel createAndConfigureMemoryChannel(HBaseSink sink) { Channel channel = new MemoryChannel(); Context channelCtx = new Context(); channelCtx.put("capacity", String.valueOf(1000L)); channelCtx.put("transactionCapacity", String.valueOf(1000L)); Configurables.configure(channel, channelCtx); sink.setChannel(channel); channel.start(); return channel; } private void putEvents(Channel channel, Iterable<Event> events) { Transaction tx = channel.getTransaction(); tx.begin(); for (Event event : events) { channel.put(event); } tx.commit(); tx.close(); } }