/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.sdk.io.hbase;
import static org.apache.beam.sdk.testing.SourceTestUtils.assertSourcesEqualReferenceSource;
import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;
import static org.hamcrest.Matchers.hasSize;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertThat;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.BoundedSource;
import org.apache.beam.sdk.io.hbase.HBaseIO.HBaseSource;
import org.apache.beam.sdk.io.range.ByteKey;
import org.apache.beam.sdk.io.range.ByteKeyRange;
import org.apache.beam.sdk.testing.PAssert;
import org.apache.beam.sdk.testing.SourceTestUtils;
import org.apache.beam.sdk.testing.TestPipeline;
import org.apache.beam.sdk.transforms.Count;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.BufferedMutator;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.hamcrest.Matchers;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
/**
* Test HBaseIO.
*/
@RunWith(JUnit4.class)
public class HBaseIOTest {
@Rule public final transient TestPipeline p = TestPipeline.create();
@Rule public ExpectedException thrown = ExpectedException.none();
private static HBaseTestingUtility htu;
private static HBaseAdmin admin;
private static Configuration conf = HBaseConfiguration.create();
private static final byte[] COLUMN_FAMILY = Bytes.toBytes("info");
private static final byte[] COLUMN_NAME = Bytes.toBytes("name");
private static final byte[] COLUMN_EMAIL = Bytes.toBytes("email");
@BeforeClass
public static void beforeClass() throws Exception {
conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
// Try to bind the hostname to localhost to solve an issue when it is not configured or
// no DNS resolution available.
conf.setStrings("hbase.master.hostname", "localhost");
conf.setStrings("hbase.regionserver.hostname", "localhost");
htu = new HBaseTestingUtility(conf);
htu.startMiniCluster(1, 4);
admin = htu.getHBaseAdmin();
}
@AfterClass
public static void afterClass() throws Exception {
if (admin != null) {
admin.close();
admin = null;
}
if (htu != null) {
htu.shutdownMiniCluster();
htu = null;
}
}
@Test
public void testReadBuildsCorrectly() {
HBaseIO.Read read = HBaseIO.read().withConfiguration(conf).withTableId("table");
assertEquals("table", read.getTableId());
assertNotNull("configuration", read.getConfiguration());
}
@Test
public void testReadBuildsCorrectlyInDifferentOrder() {
HBaseIO.Read read = HBaseIO.read().withTableId("table").withConfiguration(conf);
assertEquals("table", read.getTableId());
assertNotNull("configuration", read.getConfiguration());
}
@Test
public void testWriteBuildsCorrectly() {
HBaseIO.Write write = HBaseIO.write().withConfiguration(conf).withTableId("table");
assertEquals("table", write.getTableId());
assertNotNull("configuration", write.getConfiguration());
}
@Test
public void testWriteBuildsCorrectlyInDifferentOrder() {
HBaseIO.Write write = HBaseIO.write().withTableId("table").withConfiguration(conf);
assertEquals("table", write.getTableId());
assertNotNull("configuration", write.getConfiguration());
}
@Test
public void testWriteValidationFailsMissingTable() {
HBaseIO.Write write = HBaseIO.write().withConfiguration(conf);
thrown.expect(IllegalArgumentException.class);
write.validate(null /* input */);
}
@Test
public void testWriteValidationFailsMissingConfiguration() {
HBaseIO.Write write = HBaseIO.write().withTableId("table");
thrown.expect(IllegalArgumentException.class);
write.validate(null /* input */);
}
/** Tests that when reading from a non-existent table, the read fails. */
@Test
public void testReadingFailsTableDoesNotExist() throws Exception {
final String table = "TEST-TABLE-INVALID";
// Exception will be thrown by read.validate() when read is applied.
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage(String.format("Table %s does not exist", table));
runReadTest(HBaseIO.read().withConfiguration(conf).withTableId(table),
new ArrayList<Result>());
}
/** Tests that when reading from an empty table, the read succeeds. */
@Test
public void testReadingEmptyTable() throws Exception {
final String table = "TEST-EMPTY-TABLE";
createTable(table);
runReadTest(HBaseIO.read().withConfiguration(conf).withTableId(table),
new ArrayList<Result>());
}
@Test
public void testReading() throws Exception {
final String table = "TEST-MANY-ROWS-TABLE";
final int numRows = 1001;
createTable(table);
writeData(table, numRows);
runReadTestLength(HBaseIO.read().withConfiguration(conf).withTableId(table), 1001);
}
/** Tests reading all rows from a split table. */
@Test
public void testReadingWithSplits() throws Exception {
final String table = "TEST-MANY-ROWS-SPLITS-TABLE";
final int numRows = 1500;
final int numRegions = 4;
final long bytesPerRow = 100L;
// Set up test table data and sample row keys for size estimation and splitting.
createTable(table);
writeData(table, numRows);
HBaseIO.Read read = HBaseIO.read().withConfiguration(conf).withTableId(table);
HBaseSource source = new HBaseSource(read, null /* estimatedSizeBytes */);
List<? extends BoundedSource<Result>> splits =
source.split(numRows * bytesPerRow / numRegions,
null /* options */);
// Test num splits and split equality.
assertThat(splits, hasSize(4));
assertSourcesEqualReferenceSource(source, splits, null /* options */);
}
/** Tests that a {@link HBaseSource} can be read twice, verifying its immutability. */
@Test
public void testReadingSourceTwice() throws Exception {
final String table = "TEST-READING-TWICE";
final int numRows = 10;
// Set up test table data and sample row keys for size estimation and splitting.
createTable(table);
writeData(table, numRows);
HBaseIO.Read read = HBaseIO.read().withConfiguration(conf).withTableId(table);
HBaseSource source = new HBaseSource(read, null /* estimatedSizeBytes */);
assertThat(SourceTestUtils.readFromSource(source, null), hasSize(numRows));
// second read.
assertThat(SourceTestUtils.readFromSource(source, null), hasSize(numRows));
}
/** Tests reading all rows using a filter. */
@Test
public void testReadingWithFilter() throws Exception {
final String table = "TEST-FILTER-TABLE";
final int numRows = 1001;
createTable(table);
writeData(table, numRows);
String regex = ".*17.*";
Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,
new RegexStringComparator(regex));
HBaseIO.Read read =
HBaseIO.read().withConfiguration(conf).withTableId(table).withFilter(filter);
runReadTestLength(read, 20);
}
/**
* Tests reading all rows using key ranges. Tests a prefix [), a suffix (], and a restricted
* range [] and that some properties hold across them.
*/
@Test
public void testReadingWithKeyRange() throws Exception {
final String table = "TEST-KEY-RANGE-TABLE";
final int numRows = 1001;
final byte[] startRow = "2".getBytes();
final byte[] stopRow = "9".getBytes();
final ByteKey startKey = ByteKey.copyFrom(startRow);
createTable(table);
writeData(table, numRows);
// Test prefix: [beginning, startKey).
final ByteKeyRange prefixRange = ByteKeyRange.ALL_KEYS.withEndKey(startKey);
runReadTestLength(HBaseIO.read().withConfiguration(conf).withTableId(table)
.withKeyRange(prefixRange), 126);
// Test suffix: [startKey, end).
final ByteKeyRange suffixRange = ByteKeyRange.ALL_KEYS.withStartKey(startKey);
runReadTestLength(HBaseIO.read().withConfiguration(conf).withTableId(table)
.withKeyRange(suffixRange), 875);
// Test restricted range: [startKey, endKey).
// This one tests the second signature of .withKeyRange
runReadTestLength(HBaseIO.read().withConfiguration(conf).withTableId(table)
.withKeyRange(startRow, stopRow), 441);
}
@Test
public void testReadingDisplayData() {
HBaseIO.Read read = HBaseIO.read().withConfiguration(conf).withTableId("fooTable");
DisplayData displayData = DisplayData.from(read);
assertThat(displayData, hasDisplayItem("tableId", "fooTable"));
assertThat(displayData, hasDisplayItem("configuration"));
}
/** Tests that a record gets written to the service and messages are logged. */
@Test
public void testWriting() throws Exception {
final String table = "table";
final String key = "key";
final String value = "value";
createTable(table);
p.apply("single row", Create.of(makeWrite(key, value)).withCoder(HBaseIO.WRITE_CODER))
.apply("write", HBaseIO.write().withConfiguration(conf).withTableId(table));
p.run().waitUntilFinish();
List<Result> results = readTable(table, new Scan());
assertEquals(1, results.size());
}
/** Tests that when writing to a non-existent table, the write fails. */
@Test
public void testWritingFailsTableDoesNotExist() throws Exception {
final String table = "TEST-TABLE-DOES-NOT-EXIST";
PCollection<KV<byte[], Iterable<Mutation>>> emptyInput =
p.apply(Create.empty(HBaseIO.WRITE_CODER));
emptyInput.apply("write", HBaseIO.write().withConfiguration(conf).withTableId(table));
// Exception will be thrown by write.validate() when write is applied.
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage(String.format("Table %s does not exist", table));
p.run();
}
/** Tests that when writing an element fails, the write fails. */
@Test
public void testWritingFailsBadElement() throws Exception {
final String table = "TEST-TABLE-BAD-ELEMENT";
final String key = "KEY";
createTable(table);
p.apply(Create.of(makeBadWrite(key)).withCoder(HBaseIO.WRITE_CODER))
.apply(HBaseIO.write().withConfiguration(conf).withTableId(table));
thrown.expect(Pipeline.PipelineExecutionException.class);
thrown.expectCause(Matchers.<Throwable>instanceOf(IllegalArgumentException.class));
thrown.expectMessage("No columns to insert");
p.run().waitUntilFinish();
}
@Test
public void testWritingDisplayData() {
HBaseIO.Write write = HBaseIO.write().withTableId("fooTable").withConfiguration(conf);
DisplayData displayData = DisplayData.from(write);
assertThat(displayData, hasDisplayItem("tableId", "fooTable"));
}
// HBase helper methods
private static void createTable(String tableId) throws Exception {
byte[][] splitKeys = {"4".getBytes(), "8".getBytes(), "C".getBytes()};
createTable(tableId, COLUMN_FAMILY, splitKeys);
}
private static void createTable(String tableId, byte[] columnFamily, byte[][] splitKeys)
throws Exception {
TableName tableName = TableName.valueOf(tableId);
HTableDescriptor desc = new HTableDescriptor(tableName);
HColumnDescriptor colDef = new HColumnDescriptor(columnFamily);
desc.addFamily(colDef);
admin.createTable(desc, splitKeys);
}
/**
* Helper function to create a table and return the rows that it created.
*/
private static void writeData(String tableId, int numRows) throws Exception {
Connection connection = admin.getConnection();
TableName tableName = TableName.valueOf(tableId);
BufferedMutator mutator = connection.getBufferedMutator(tableName);
List<Mutation> mutations = makeTableData(numRows);
mutator.mutate(mutations);
mutator.flush();
mutator.close();
}
private static List<Mutation> makeTableData(int numRows) {
List<Mutation> mutations = new ArrayList<>(numRows);
for (int i = 0; i < numRows; ++i) {
// We pad values in hex order 0,1, ... ,F,0, ...
String prefix = String.format("%X", i % 16);
// This 21 is to have a key longer than an input
byte[] rowKey = Bytes.toBytes(
StringUtils.leftPad("_" + String.valueOf(i), 21, prefix));
byte[] value = Bytes.toBytes(String.valueOf(i));
byte[] valueEmail = Bytes.toBytes(String.valueOf(i) + "@email.com");
mutations.add(new Put(rowKey).addColumn(COLUMN_FAMILY, COLUMN_NAME, value));
mutations.add(new Put(rowKey).addColumn(COLUMN_FAMILY, COLUMN_EMAIL, valueEmail));
}
return mutations;
}
private static ResultScanner scanTable(String tableId, Scan scan) throws Exception {
Connection connection = ConnectionFactory.createConnection(conf);
TableName tableName = TableName.valueOf(tableId);
Table table = connection.getTable(tableName);
return table.getScanner(scan);
}
private static List<Result> readTable(String tableId, Scan scan) throws Exception {
ResultScanner scanner = scanTable(tableId, scan);
List<Result> results = new ArrayList<>();
for (Result result : scanner) {
results.add(result);
}
scanner.close();
return results;
}
// Beam helper methods
/** Helper function to make a single row mutation to be written. */
private static KV<byte[], Iterable<Mutation>> makeWrite(String key, String value) {
byte[] rowKey = key.getBytes(StandardCharsets.UTF_8);
List<Mutation> mutations = new ArrayList<>();
mutations.add(makeMutation(key, value));
return KV.of(rowKey, (Iterable<Mutation>) mutations);
}
private static Mutation makeMutation(String key, String value) {
byte[] rowKey = key.getBytes(StandardCharsets.UTF_8);
return new Put(rowKey)
.addColumn(COLUMN_FAMILY, COLUMN_NAME, Bytes.toBytes(value))
.addColumn(COLUMN_FAMILY, COLUMN_EMAIL, Bytes.toBytes(value + "@email.com"));
}
private static KV<byte[], Iterable<Mutation>> makeBadWrite(String key) {
Put put = new Put(key.getBytes());
List<Mutation> mutations = new ArrayList<>();
mutations.add(put);
return KV.of(key.getBytes(StandardCharsets.UTF_8), (Iterable<Mutation>) mutations);
}
private void runReadTest(HBaseIO.Read read, List<Result> expected) {
final String transformId = read.getTableId() + "_" + read.getKeyRange();
PCollection<Result> rows = p.apply("Read" + transformId, read);
PAssert.that(rows).containsInAnyOrder(expected);
p.run().waitUntilFinish();
}
private void runReadTestLength(HBaseIO.Read read, long numElements) {
final String transformId = read.getTableId() + "_" + read.getKeyRange();
PCollection<Result> rows = p.apply("Read" + transformId, read);
PAssert.thatSingleton(rows.apply("Count" + transformId,
Count.<Result>globally())).isEqualTo(numElements);
p.run().waitUntilFinish();
}
}