/*
* Copyright The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.addons.hbase;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.ExecutionEnvironmentFactory;
import org.apache.flink.api.java.LocalEnvironment;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.TaskManagerOptions;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableConfig;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.java.BatchTableEnvironment;
import org.apache.flink.table.functions.ScalarFunction;
import org.apache.flink.test.util.TestBaseUtils;
import org.apache.flink.types.Row;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import static org.junit.Assert.assertEquals;
/**
* This class contains integrations tests for multiple HBase connectors:
* - TableInputFormat
* - HBaseTableSource
*
* These tests are located in a single test file to avoided unnecessary initializations of the
* HBaseTestingCluster which takes about half a minute.
*
*/
public class HBaseConnectorITCase extends HBaseTestingClusterAutostarter {
private static final String TEST_TABLE = "testTable";
private static final String FAMILY1 = "family1";
private static final String F1COL1 = "col1";
private static final String FAMILY2 = "family2";
private static final String F2COL1 = "col1";
private static final String F2COL2 = "col2";
private static final String FAMILY3 = "family3";
private static final String F3COL1 = "col1";
private static final String F3COL2 = "col2";
private static final String F3COL3 = "col3";
@BeforeClass
public static void activateHBaseCluster() throws IOException {
registerHBaseMiniClusterInClasspath();
prepareTable();
LimitNetworkBuffersTestEnvironment.setAsContext();
}
@AfterClass
public static void resetExecutionEnvironmentFactory() {
LimitNetworkBuffersTestEnvironment.unsetAsContext();
}
private static void prepareTable() throws IOException {
// create a table
TableName tableName = TableName.valueOf(TEST_TABLE);
// column families
byte[][] families = new byte[][]{
Bytes.toBytes(FAMILY1),
Bytes.toBytes(FAMILY2),
Bytes.toBytes(FAMILY3)
};
// split keys
byte[][] splitKeys = new byte[][]{ Bytes.toBytes(4) };
createTable(tableName, families, splitKeys);
// get the HTable instance
HTable table = openTable(tableName);
List<Put> puts = new ArrayList<>();
// add some data
puts.add(putRow(1, 10, "Hello-1", 100L, 1.01, false, "Welt-1"));
puts.add(putRow(2, 20, "Hello-2", 200L, 2.02, true, "Welt-2"));
puts.add(putRow(3, 30, "Hello-3", 300L, 3.03, false, "Welt-3"));
puts.add(putRow(4, 40, null, 400L, 4.04, true, "Welt-4"));
puts.add(putRow(5, 50, "Hello-5", 500L, 5.05, false, "Welt-5"));
puts.add(putRow(6, 60, "Hello-6", 600L, 6.06, true, "Welt-6"));
puts.add(putRow(7, 70, "Hello-7", 700L, 7.07, false, "Welt-7"));
puts.add(putRow(8, 80, null, 800L, 8.08, true, "Welt-8"));
// append rows to table
table.put(puts);
table.close();
}
private static Put putRow(int rowKey, int f1c1, String f2c1, long f2c2, double f3c1, boolean f3c2, String f3c3) {
Put put = new Put(Bytes.toBytes(rowKey));
// family 1
put.addColumn(Bytes.toBytes(FAMILY1), Bytes.toBytes(F1COL1), Bytes.toBytes(f1c1));
// family 2
if (f2c1 != null) {
put.addColumn(Bytes.toBytes(FAMILY2), Bytes.toBytes(F2COL1), Bytes.toBytes(f2c1));
}
put.addColumn(Bytes.toBytes(FAMILY2), Bytes.toBytes(F2COL2), Bytes.toBytes(f2c2));
// family 3
put.addColumn(Bytes.toBytes(FAMILY3), Bytes.toBytes(F3COL1), Bytes.toBytes(f3c1));
put.addColumn(Bytes.toBytes(FAMILY3), Bytes.toBytes(F3COL2), Bytes.toBytes(f3c2));
put.addColumn(Bytes.toBytes(FAMILY3), Bytes.toBytes(F3COL3), Bytes.toBytes(f3c3));
return put;
}
// ######## HBaseTableSource tests ############
@Test
public void testTableSourceFullScan() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, new TableConfig());
HBaseTableSource hbaseTable = new HBaseTableSource(getConf(), TEST_TABLE);
hbaseTable.addColumn(FAMILY1, F1COL1, Integer.class);
hbaseTable.addColumn(FAMILY2, F2COL1, String.class);
hbaseTable.addColumn(FAMILY2, F2COL2, Long.class);
hbaseTable.addColumn(FAMILY3, F3COL1, Double.class);
hbaseTable.addColumn(FAMILY3, F3COL2, Boolean.class);
hbaseTable.addColumn(FAMILY3, F3COL3, String.class);
tableEnv.registerTableSource("hTable", hbaseTable);
Table result = tableEnv.sql(
"SELECT " +
" h.family1.col1, " +
" h.family2.col1, " +
" h.family2.col2, " +
" h.family3.col1, " +
" h.family3.col2, " +
" h.family3.col3 " +
"FROM hTable AS h"
);
DataSet<Row> resultSet = tableEnv.toDataSet(result, Row.class);
List<Row> results = resultSet.collect();
String expected =
"10,Hello-1,100,1.01,false,Welt-1\n" +
"20,Hello-2,200,2.02,true,Welt-2\n" +
"30,Hello-3,300,3.03,false,Welt-3\n" +
"40,null,400,4.04,true,Welt-4\n" +
"50,Hello-5,500,5.05,false,Welt-5\n" +
"60,Hello-6,600,6.06,true,Welt-6\n" +
"70,Hello-7,700,7.07,false,Welt-7\n" +
"80,null,800,8.08,true,Welt-8\n";
TestBaseUtils.compareResultAsText(results, expected);
}
@Test
public void testTableSourceProjection() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, new TableConfig());
HBaseTableSource hbaseTable = new HBaseTableSource(getConf(), TEST_TABLE);
hbaseTable.addColumn(FAMILY1, F1COL1, Integer.class);
hbaseTable.addColumn(FAMILY2, F2COL1, String.class);
hbaseTable.addColumn(FAMILY2, F2COL2, Long.class);
hbaseTable.addColumn(FAMILY3, F3COL1, Double.class);
hbaseTable.addColumn(FAMILY3, F3COL2, Boolean.class);
hbaseTable.addColumn(FAMILY3, F3COL3, String.class);
tableEnv.registerTableSource("hTable", hbaseTable);
Table result = tableEnv.sql(
"SELECT " +
" h.family1.col1, " +
" h.family3.col1, " +
" h.family3.col2, " +
" h.family3.col3 " +
"FROM hTable AS h"
);
DataSet<Row> resultSet = tableEnv.toDataSet(result, Row.class);
List<Row> results = resultSet.collect();
String expected =
"10,1.01,false,Welt-1\n" +
"20,2.02,true,Welt-2\n" +
"30,3.03,false,Welt-3\n" +
"40,4.04,true,Welt-4\n" +
"50,5.05,false,Welt-5\n" +
"60,6.06,true,Welt-6\n" +
"70,7.07,false,Welt-7\n" +
"80,8.08,true,Welt-8\n";
TestBaseUtils.compareResultAsText(results, expected);
}
@Test
public void testTableSourceFieldOrder() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, new TableConfig());
HBaseTableSource hbaseTable = new HBaseTableSource(getConf(), TEST_TABLE);
// shuffle order of column registration
hbaseTable.addColumn(FAMILY2, F2COL1, String.class);
hbaseTable.addColumn(FAMILY3, F3COL1, Double.class);
hbaseTable.addColumn(FAMILY1, F1COL1, Integer.class);
hbaseTable.addColumn(FAMILY2, F2COL2, Long.class);
hbaseTable.addColumn(FAMILY3, F3COL2, Boolean.class);
hbaseTable.addColumn(FAMILY3, F3COL3, String.class);
tableEnv.registerTableSource("hTable", hbaseTable);
Table result = tableEnv.sql(
"SELECT * FROM hTable AS h"
);
DataSet<Row> resultSet = tableEnv.toDataSet(result, Row.class);
List<Row> results = resultSet.collect();
String expected =
"Hello-1,100,1.01,false,Welt-1,10\n" +
"Hello-2,200,2.02,true,Welt-2,20\n" +
"Hello-3,300,3.03,false,Welt-3,30\n" +
"null,400,4.04,true,Welt-4,40\n" +
"Hello-5,500,5.05,false,Welt-5,50\n" +
"Hello-6,600,6.06,true,Welt-6,60\n" +
"Hello-7,700,7.07,false,Welt-7,70\n" +
"null,800,8.08,true,Welt-8,80\n";
TestBaseUtils.compareResultAsText(results, expected);
}
@Test
public void testTableSourceReadAsByteArray() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, new TableConfig());
// fetch row2 from the table till the end
HBaseTableSource hbaseTable = new HBaseTableSource(getConf(), TEST_TABLE);
hbaseTable.addColumn(FAMILY2, F2COL1, byte[].class);
hbaseTable.addColumn(FAMILY2, F2COL2, byte[].class);
tableEnv.registerTableSource("hTable", hbaseTable);
tableEnv.registerFunction("toUTF8", new ToUTF8());
tableEnv.registerFunction("toLong", new ToLong());
Table result = tableEnv.sql(
"SELECT " +
" toUTF8(h.family2.col1), " +
" toLong(h.family2.col2) " +
"FROM hTable AS h"
);
DataSet<Row> resultSet = tableEnv.toDataSet(result, Row.class);
List<Row> results = resultSet.collect();
String expected =
"Hello-1,100\n" +
"Hello-2,200\n" +
"Hello-3,300\n" +
"null,400\n" +
"Hello-5,500\n" +
"Hello-6,600\n" +
"Hello-7,700\n" +
"null,800\n";
TestBaseUtils.compareResultAsText(results, expected);
}
public static class ToUTF8 extends ScalarFunction {
public String eval(byte[] bytes) {
return Bytes.toString(bytes);
}
}
public static class ToLong extends ScalarFunction {
public long eval(byte[] bytes) {
return Bytes.toLong(bytes);
}
}
// ######## TableInputFormate tests ############
class InputFormatForTestTable extends TableInputFormat<Tuple1<Integer>> {
@Override
protected Scan getScanner() {
return new Scan();
}
@Override
protected String getTableName() {
return TEST_TABLE;
}
@Override
protected Tuple1<Integer> mapResultToTuple(Result r) {
return new Tuple1<>(Bytes.toInt(r.getValue(Bytes.toBytes(FAMILY1), Bytes.toBytes(F1COL1))));
}
}
@Test
public void testTableInputFormat() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
DataSet<Tuple1<Integer>> result = env
.createInput(new InputFormatForTestTable())
.reduce(new ReduceFunction<Tuple1<Integer>>(){
@Override
public Tuple1<Integer> reduce(Tuple1<Integer> v1, Tuple1<Integer> v2) throws Exception {
return Tuple1.of(v1.f0 + v2.f0);
}
});
List<Tuple1<Integer>> resultSet = result.collect();
assertEquals(1, resultSet.size());
assertEquals(360, (int)resultSet.get(0).f0);
}
/**
* Allows the tests to use {@link ExecutionEnvironment#getExecutionEnvironment()} but with a
* configuration that limits the maximum memory used for network buffers since the current
* defaults are too high for Travis-CI.
*/
private static abstract class LimitNetworkBuffersTestEnvironment extends ExecutionEnvironment {
public static void setAsContext() {
Configuration config = new Configuration();
// the default network buffers size (10% of heap max =~ 150MB) seems to much for this test case
config.setLong(TaskManagerOptions.NETWORK_BUFFERS_MEMORY_MAX, 80L << 20); // 80 MB
final LocalEnvironment le = new LocalEnvironment(config);
initializeContextEnvironment(new ExecutionEnvironmentFactory() {
@Override
public ExecutionEnvironment createExecutionEnvironment() {
return le;
}
});
}
public static void unsetAsContext() {
resetContextEnvironment();
}
}
}