/*******************************************************************************
* Pentaho Big Data
* <p/>
* Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com
* <p/>
* ******************************************************************************
* <p/>
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
******************************************************************************/
package org.pentaho.hbase.shim.common;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryPrefixComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.FamilyFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.TimestampsFilter;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.pentaho.di.core.logging.KettleLogStore;
import org.pentaho.di.core.logging.LogChannelInterface;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.hbase.factory.HBaseAdmin;
import org.pentaho.hbase.factory.HBaseClientFactory;
import org.pentaho.hbase.factory.HBaseClientFactoryLocator;
import org.pentaho.hbase.factory.HBasePut;
import org.pentaho.hbase.factory.HBaseTable;
import org.pentaho.hbase.shim.api.ColumnFilter;
import org.pentaho.hbase.shim.api.HBaseValueMeta;
import org.pentaho.hbase.shim.api.Mapping;
import org.pentaho.hbase.shim.spi.HBaseBytesUtilShim;
import org.pentaho.hbase.shim.spi.HBaseConnection;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.net.MalformedURLException;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.NavigableMap;
import java.util.Properties;
import java.util.Set;
/**
* Concrete implementation for Hadoop 20.x.
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
*/
public class CommonHBaseConnection extends HBaseConnection {
private static Class<?> PKG = CommonHBaseConnection.class;
protected Configuration m_config = null;
protected HBaseAdmin m_admin;
protected HBaseClientFactory m_factory;
protected HBaseTable m_sourceTable;
protected Scan m_sourceScan;
protected ResultScanner m_resultSet;
protected Result m_currentResultSetRow;
protected HBaseTable m_targetTable;
protected HBasePut m_currentTargetPut;
protected HBaseBytesUtilShim m_bytesUtil;
protected LogChannelInterface log = KettleLogStore.getLogChannelInterfaceFactory().create( this );
public CommonHBaseConnection() {
try {
getBytesUtil();
} catch ( Exception ex ) {
throw new RuntimeException( ex );
}
}
@Override
public void configureConnection( Properties connProps, List<String> logMessages ) throws Exception {
ClassLoader cl = Thread.currentThread().getContextClassLoader();
Thread.currentThread().setContextClassLoader( getClass().getClassLoader() );
try {
String defaultConfig = connProps.getProperty( DEFAULTS_KEY );
String siteConfig = connProps.getProperty( SITE_KEY );
String zookeeperQuorum = connProps.getProperty( ZOOKEEPER_QUORUM_KEY );
String zookeeperPort = connProps.getProperty( ZOOKEEPER_PORT_KEY );
m_config = new Configuration();
try {
if ( !isEmpty( defaultConfig ) ) {
m_config.addResource( stringToURL( defaultConfig ) );
} else {
m_config.addResource( "hbase-default.xml" );
}
if ( !isEmpty( siteConfig ) ) {
m_config.addResource( stringToURL( siteConfig ) );
} else {
m_config.addResource( "hbase-site.xml" );
}
} catch ( MalformedURLException e ) {
throw new IllegalArgumentException(
BaseMessages.getString( PKG, "CommonHBaseConnection.Error.MalformedConfigURL" ) );
}
if ( !isEmpty( zookeeperQuorum ) && !isEmpty( m_config.get( ZOOKEEPER_QUORUM_KEY ) ) ) {
if ( !doZookeeperQuorumInNamedClusterAndConfigMatch( zookeeperQuorum ) ) {
String message = BaseMessages.
getString( PKG, "CommonHBaseConnection.Error.MismatchZookeeperNamedClusterVsConfiguration", zookeeperQuorum, m_config.get( ZOOKEEPER_QUORUM_KEY ) );
log.logBasic( message );
//no throw exception here as for using some specific cases in host name - aliases that totally different from host name or ips, that case
//can be checked only ping ip which is too expensive
}
}
if ( !isEmpty( zookeeperQuorum ) ) {
m_config.set( ZOOKEEPER_QUORUM_KEY, zookeeperQuorum );
}
if ( !isEmpty( zookeeperPort ) ) {
try {
int port = Integer.parseInt( zookeeperPort );
m_config.setInt( ZOOKEEPER_PORT_KEY, port );
} catch ( NumberFormatException e ) {
if ( logMessages != null ) {
logMessages.add( BaseMessages.getString( PKG, "CommonHBaseConnection.Error.UnableToParseZookeeperPort" ) );
}
}
}
if ( log.isDebug() ) {
log.logDebug( "Opening HBase connection ..." );
}
m_factory = getHBaseClientFactory( m_config );
m_admin = m_factory.getHBaseAdmin();
} finally {
Thread.currentThread().setContextClassLoader( cl );
}
}
private boolean doZookeeperQuorumInNamedClusterAndConfigMatch( String zookeeperQuorum ) {
return
allZookeperHostsFromNameNodeInConfigQuorum( zookeeperQuorum ) || atLeastOneHostFromConfigInNameClusterZookeeperQuorum( zookeeperQuorum );
}
private boolean allZookeperHostsFromNameNodeInConfigQuorum( String zookeeperQuorum ) {
String[] quorum = zookeeperQuorum.toLowerCase().split( "," );
String configQuorum = m_config.get( ZOOKEEPER_QUORUM_KEY ).toLowerCase();
for ( String node : quorum ) {
node = node.trim();
//if zookeeper host contains port
if ( node.contains( ":" ) ) {
node = node.substring( 0, node.indexOf( ":" ) );
}
if ( !configQuorum.contains( node ) ) {
return false;
}
}
return true;
}
/**
* if short name is used only in config, but fully qualified in named cluster - we check whether at least zookeper
* host from config is contained in full string of named cluster quorum
*
* @param zookeeperQuorum string containing host of zookeeper with ports, separated by comma
* @return true if at least on host found
*/
private boolean atLeastOneHostFromConfigInNameClusterZookeeperQuorum( String zookeeperQuorum ) {
String[] configZookeeperQuorum = m_config.get( ZOOKEEPER_QUORUM_KEY ).toLowerCase().split( "," );
String clientZookeeperQuorum = zookeeperQuorum.toLowerCase();
for ( String node : configZookeeperQuorum ) {
node = node.trim();
//if zookeeper host contains port
if ( node.contains( ":" ) ) {
node = node.substring( 0, node.indexOf( ":" ) );
}
if ( clientZookeeperQuorum.contains( node ) ) {
return true;
}
}
return false;
}
protected HBaseClientFactory getHBaseClientFactory( Configuration configuration ) {
return HBaseClientFactoryLocator.getHBaseClientFactory( configuration );
}
@Override
public HBaseBytesUtilShim getBytesUtil() throws Exception {
if ( m_bytesUtil == null ) {
m_bytesUtil = new CommonHBaseBytesUtil();
}
return m_bytesUtil;
}
protected void checkConfiguration() throws Exception {
if ( m_admin == null ) {
throw new Exception(
BaseMessages.getString( PKG, "CommonHBaseConnection.Error.ConnectionHasNotBeenConfigured" ) );
}
}
@Override
public void checkHBaseAvailable() throws Exception {
checkConfiguration();
ClassLoader cl = Thread.currentThread().getContextClassLoader();
Thread.currentThread().setContextClassLoader( getClass().getClassLoader() );
try {
org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable( m_config );
} finally {
Thread.currentThread().setContextClassLoader( cl );
}
}
@Override
public List<String> listTableNames() throws Exception {
checkConfiguration();
HTableDescriptor[] tables = m_admin.listTables();
List<String> tableNames = new ArrayList<String>();
for ( HTableDescriptor h : tables ) {
tableNames.add( h.getNameAsString() );
}
return tableNames;
}
@Override
public boolean tableExists( String tableName ) throws Exception {
checkConfiguration();
return m_admin.tableExists( tableName );
}
@Override
public void disableTable( String tableName ) throws Exception {
checkConfiguration();
m_admin.disableTable( tableName );
}
@Override
public void enableTable( String tableName ) throws Exception {
checkConfiguration();
m_admin.enableTable( tableName );
}
@Override
public boolean isTableDisabled( String tableName ) throws Exception {
checkConfiguration();
return m_admin.isTableDisabled( tableName );
}
@Override
public boolean isTableAvailable( String tableName ) throws Exception {
checkConfiguration();
return m_admin.isTableAvailable( tableName );
}
@Override
public void deleteTable( String tableName ) throws Exception {
checkConfiguration();
m_admin.deleteTable( tableName );
}
@Override
public List<String> getTableFamiles( String tableName ) throws Exception {
checkConfiguration();
HTableDescriptor descriptor = m_admin.getTableDescriptor( m_bytesUtil.toBytes( tableName ) );
Collection<HColumnDescriptor> families = descriptor.getFamilies();
List<String> famList = new ArrayList<String>();
for ( HColumnDescriptor h : families ) {
famList.add( h.getNameAsString() );
}
return famList;
}
protected void configureColumnDescriptor( HColumnDescriptor h, Properties p ) throws Exception {
if ( p != null ) {
// optional column family creation properties
Set<Object> keys = p.keySet();
for ( Object key : keys ) {
String value = p.getProperty( key.toString() );
if ( key.toString().equals( COL_DESCRIPTOR_MAX_VERSIONS_KEY ) ) {
h.setMaxVersions( Integer.parseInt( value ) );
} else if ( key.toString().equals( COL_DESCRIPTOR_COMPRESSION_KEY ) ) {
// Look up Compression.Algorithm.valueOf() method via reflection, as the API has changed
// between Hadoop 1 and 2.
// Also, valueOf() will throw an IllegalArgumentException if the value is not a legitimate algorithm name
Class<?> compressionAlgorithmClass = getCompressionAlgorithmClass();
Method valueOf = compressionAlgorithmClass.getMethod( "valueOf", String.class );
// Since we don't know the type of the Algorithm class at compile time, we need to
// use reflection here to set the Compression Algorithm for the HColumnDescriptor
Method setCompressionType = h.getClass().getMethod( "setCompressionType", compressionAlgorithmClass );
setCompressionType.invoke( h, valueOf.invoke( null, value ) );
} else if ( key.toString().equals( COL_DESCRIPTOR_IN_MEMORY_KEY ) ) {
boolean result = toBoolean( value );
h.setInMemory( result );
} else if ( key.toString().equals( COL_DESCRIPTOR_BLOCK_CACHE_ENABLED_KEY ) ) {
boolean result = ( toBoolean( value ) );
h.setBlockCacheEnabled( result );
} else if ( key.toString().equals( COL_DESCRIPTOR_BLOCK_SIZE_KEY ) ) {
h.setBlocksize( Integer.parseInt( value ) );
} else if ( key.toString().equals( COL_DESCRIPTOR_TIME_TO_LIVE_KEY ) ) {
h.setTimeToLive( Integer.parseInt( value ) );
} else if ( key.toString().equals( COL_DESCRIPTOR_BLOOM_FILTER_KEY ) ) {
// Again, need to use reflection to get the BloomType class to call valueOf and then setBloomFilterType
Class<?> bloomTypeClass = getBloomTypeClass();
Method valueOf = bloomTypeClass.getMethod( "valueOf", String.class );
Method setBloomFilterType = h.getClass().getMethod( "setBloomFilterType" );
setBloomFilterType.invoke( h, valueOf.invoke( null, value ) );
} else if ( key.toString().equals( COL_DESCRIPTOR_SCOPE_KEY ) ) {
h.setScope( Integer.parseInt( value ) );
}
}
}
}
protected void checkSourceTable() throws Exception {
if ( m_sourceTable == null ) {
throw new Exception( BaseMessages.getString( PKG, "CommonHBaseConnection.Error.NoSourceTable" ) );
}
}
protected void checkSourceScan() throws Exception {
if ( m_sourceScan == null ) {
throw new Exception( BaseMessages.getString( PKG, "CommonHBaseConnection.Error.NoSourceScan" ) );
}
}
@Override
public void createTable( String tableName, List<String> colFamilyNames, Properties creationProps ) throws Exception {
checkConfiguration();
HTableDescriptor tableDescription = m_factory.getHBaseTableDescriptor( tableName );
for ( String familyName : colFamilyNames ) {
HColumnDescriptor c = new HColumnDescriptor( familyName );
configureColumnDescriptor( c, creationProps );
tableDescription.addFamily( c );
}
m_admin.createTable( tableDescription );
}
@Override
public void newSourceTable( String tableName ) throws Exception {
checkConfiguration();
closeSourceTable();
m_sourceTable = m_factory.getHBaseTable( tableName );
}
@Override
public boolean sourceTableRowExists( byte[] rowKey ) throws Exception {
checkConfiguration();
checkSourceTable();
Get g = new Get( rowKey );
Result r = m_sourceTable.get( g );
return ( !r.isEmpty() );
}
@Override
public void newSourceTableScan( byte[] keyLowerBound, byte[] keyUpperBound, int cacheSize ) throws Exception {
checkConfiguration();
checkSourceTable();
closeSourceResultSet();
if ( keyLowerBound != null ) {
if ( keyUpperBound != null ) {
m_sourceScan = new Scan( keyLowerBound, keyUpperBound );
} else {
m_sourceScan = new Scan( keyLowerBound );
}
} else {
m_sourceScan = new Scan();
}
if ( cacheSize > 0 ) {
m_sourceScan.setCaching( cacheSize );
}
}
@Override
public void addColumnToScan( String colFamilyName, String colName, boolean colNameIsBinary ) throws Exception {
checkSourceScan();
m_sourceScan.addColumn( m_bytesUtil.toBytes( colFamilyName ),
( colNameIsBinary ) ? m_bytesUtil.toBytesBinary( colName ) : m_bytesUtil.toBytes( colName ) );
}
/**
* Add a column filter to the list of filters that the scanner will apply to rows server-side.
*
* @param cf the column filter to add
* @param columnMeta the meta data for the column used in the filter to add
* @param matchAny true if the list of filters (if not created yet) should be "match one" (and false if it should be
* "match all")
* @param vars variables to use
* @throws Exception if a problem occurs
*/
@Override
public void addColumnFilterToScan( ColumnFilter cf, HBaseValueMeta columnMeta, VariableSpace vars, boolean matchAny )
throws Exception {
checkSourceScan();
ClassLoader cl = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader( getClass().getClassLoader() );
createEmptyFilterIfNull( matchAny );
FilterList fl = (FilterList) m_sourceScan.getFilter();
ColumnFilter.ComparisonType op = cf.getComparisonOperator();
CompareFilter.CompareOp comp = getCompareOpByComparisonType( op );
String comparisonString = cf.getConstant().trim();
comparisonString = vars.environmentSubstitute( comparisonString );
byte[] comparison = m_bytesUtil.toBytes( comparisonString );
Class<?> comparatorClass = getByteArrayComparableClass();
Object comparator = null;
if ( comp != null ) {
// do the numeric comparison stuff
if ( columnMeta.isNumeric() ) {
if ( !cf.getSignedComparison() && columnMeta.isInteger() ) {
comparatorClass = byte[].class;
}
comparator = getNumericComparator( cf, columnMeta, vars, comparisonString );
} else if ( columnMeta.isDate() ) {
comparator = getDateComparator( cf, vars, comparisonString );
} else if ( columnMeta.isBoolean() ) {
// temporarily encode it so that we can use the utility routine in
// HBaseValueMeta
byte[] tempEncoded = m_bytesUtil.toBytes( comparisonString );
Boolean decodedB = HBaseValueMeta.decodeBoolFromString( tempEncoded, m_bytesUtil );
// skip if we can't parse the comparison value
if ( decodedB == null ) {
return;
}
comparator = getBooleanComparator( decodedB );
}
} else {
comp = CompareFilter.CompareOp.EQUAL;
if ( cf.getComparisonOperator() == ColumnFilter.ComparisonType.SUBSTRING ) {
comparator = new SubstringComparator( comparisonString );
} else if ( cf.getComparisonOperator() == ColumnFilter.ComparisonType.REGEX ) {
comparator = new RegexStringComparator( comparisonString );
} else /*if ( cf.getComparisonOperator() == ColumnFilter.ComparisonType.PREFIX )*/ {
//First of all check if it is Key in this case prefix filter is more appreciable
if ( columnMeta.isKey() ) {
PrefixFilter scf = new PrefixFilter( comparison );
fl.addFilter( scf );
return;
}
comparator = new BinaryPrefixComparator( comparison );
// comparator == null means prefix was chosen
}
}
if ( comparator != null ) {
Mapping.TupleMapping tupleMapping;
tupleMapping = getTupleMappingByName( cf.getFieldAlias().toUpperCase() );
if ( tupleMapping != null ) {
addFilterByMapping( fl, comp, comparatorClass, comparator, tupleMapping );
return;
}
byte[] family = m_bytesUtil.toBytes( columnMeta.getColumnFamily() );
byte[] qualifier = m_bytesUtil.toBytes( columnMeta.getColumnName() );
Constructor<SingleColumnValueFilter>
scvfCtor =
SingleColumnValueFilter.class
.getConstructor( byte[].class, byte[].class, CompareFilter.CompareOp.class, comparatorClass );
SingleColumnValueFilter scf = scvfCtor.newInstance( family, qualifier, comp, comparator );
scf.setFilterIfMissing( true );
fl.addFilter( scf );
} else {
//First of all check if it is Key
if ( columnMeta.isKey() ) {
PrefixFilter scf = new PrefixFilter( comparison );
fl.addFilter( scf );
}
}
} finally {
Thread.currentThread().setContextClassLoader( cl );
}
}
protected Object getNumericComparator( ColumnFilter cf, HBaseValueMeta columnMeta, VariableSpace vars,
String comparisonString ) throws Exception {
DecimalFormat df = new DecimalFormat();
String formatS = vars.environmentSubstitute( cf.getFormat() );
if ( !isEmpty( formatS ) ) {
df.applyPattern( formatS );
}
Number num = df.parse( comparisonString );
Object comparator;
if ( cf.getSignedComparison() ) {
comparator = getSignedComparisonComparator( columnMeta, num );
} else if ( columnMeta.isInteger() ) {
//comparatorClass = byte[].class;
if ( !columnMeta.getIsLongOrDouble() ) {
comparator = m_bytesUtil.toBytes( num.intValue() );
} else {
comparator = m_bytesUtil.toBytes( num.longValue() );
}
} else {
if ( !columnMeta.getIsLongOrDouble() ) {
comparator = m_bytesUtil.toBytes( num.floatValue() );
} else {
comparator = m_bytesUtil.toBytes( num.doubleValue() );
}
}
return comparator;
}
protected Object getBooleanComparator( Boolean decodedB )
throws ClassNotFoundException, NoSuchMethodException, InstantiationException, IllegalAccessException,
java.lang.reflect.InvocationTargetException {
Class<?> deserializedBooleanComparatorClass = getDeserializedBooleanComparatorClass();
Constructor<?> ctor = deserializedBooleanComparatorClass.getConstructor( boolean.class );
return ctor.newInstance( decodedB );
}
protected Object getDateComparator( ColumnFilter cf, VariableSpace vars, String comparisonString )
throws ParseException, ClassNotFoundException, NoSuchMethodException, InstantiationException,
IllegalAccessException, java.lang.reflect.InvocationTargetException {
SimpleDateFormat sdf = new SimpleDateFormat();
String formatS = vars.environmentSubstitute( cf.getFormat() );
if ( !isEmpty( formatS ) ) {
sdf.applyPattern( formatS );
}
Object comparator;
Date d = sdf.parse( comparisonString );
long dateAsMillis = d.getTime();
if ( !cf.getSignedComparison() ) {
comparator = m_bytesUtil.toBytes( dateAsMillis );
} else {
// custom comparator for signed comparison
Class<?> deserializedNumericComparatorClass = getDeserializedNumericComparatorClass();
Constructor<?>
ctor =
deserializedNumericComparatorClass.getConstructor( boolean.class, boolean.class, long.class );
comparator = ctor.newInstance( true, true, dateAsMillis );
}
return comparator;
}
protected Object getSignedComparisonComparator( HBaseValueMeta columnMeta, Number num ) throws Exception {
// custom comparator for signed comparison, specific to each shim due to HBase API changes
Class<?> deserializedNumericComparatorClass = getDeserializedNumericComparatorClass();
Object comparator;
if ( columnMeta.isInteger() ) {
Constructor<?>
ctor =
deserializedNumericComparatorClass.getConstructor( boolean.class, boolean.class, long.class );
if ( columnMeta.getIsLongOrDouble() ) {
comparator = ctor.newInstance( columnMeta.isInteger(), columnMeta.getIsLongOrDouble(), num.longValue() );
} else {
comparator = ctor.newInstance( columnMeta.isInteger(), columnMeta.getIsLongOrDouble(), (long) num.intValue() );
}
} else {
Constructor<?>
ctor =
deserializedNumericComparatorClass.getConstructor( boolean.class, boolean.class, double.class );
if ( columnMeta.getIsLongOrDouble() ) {
comparator = ctor.newInstance( columnMeta.isInteger(), columnMeta.getIsLongOrDouble(), num.doubleValue() );
} else {
comparator =
ctor.newInstance( columnMeta.isInteger(), columnMeta.getIsLongOrDouble(), (double) num.floatValue() );
}
}
return comparator;
}
void addFilterByMapping( FilterList fl, CompareFilter.CompareOp comp, Class<?> comparatorClass, Object comparator,
Mapping.TupleMapping tupleMapping ) throws NoSuchMethodException, InstantiationException, IllegalAccessException,
java.lang.reflect.InvocationTargetException {
switch ( tupleMapping ) {
case KEY: {
addFilter( RowFilter.class, fl, comp, comparatorClass, comparator );
return;
}
case FAMILY: {
addFilter( FamilyFilter.class, fl, comp, comparatorClass, comparator );
return;
}
case COLUMN: {
//TODO Check if ColumnPrefixFilter works faster and suit more
addFilter( QualifierFilter.class, fl, comp, comparatorClass, comparator );
return;
}
case VALUE: {
addFilter( ValueFilter.class, fl, comp, comparatorClass, comparator );
return;
}
case TIMESTAMP: {
addFilter( TimestampsFilter.class, fl, comp, comparatorClass, comparator );
// Constructor<TimestampsFilter> columnFilterConstructor =
// TimestampsFilter.class.getConstructor( CompareFilter.CompareOp.class, comparatorClass );
// TimestampsFilter scf = columnFilterConstructor.newInstance( comp, comparator );
// fl.addFilter( scf );
return;
}
}
}
protected <T extends Filter> void addFilter( Class<T> filterClass, FilterList fl, CompareFilter.CompareOp comp,
Class<?> comparatorClass, Object comparator )
throws NoSuchMethodException, InstantiationException, IllegalAccessException,
java.lang.reflect.InvocationTargetException {
Constructor<T> constructor = filterClass.getConstructor( CompareFilter.CompareOp.class, comparatorClass );
T scf = constructor.newInstance( comp, comparator );
fl.addFilter( scf );
}
protected Mapping.TupleMapping getTupleMappingByName( String name ) {
Mapping.TupleMapping tupleMapping;
try {
tupleMapping = Mapping.TupleMapping.valueOf( name );
} catch ( IllegalArgumentException ignored ) {
tupleMapping = null;
}
return tupleMapping;
}
protected void createEmptyFilterIfNull( boolean matchAny ) {
if ( m_sourceScan.getFilter() == null ) {
// create a new FilterList
FilterList
fl =
new FilterList( matchAny ? FilterList.Operator.MUST_PASS_ONE : FilterList.Operator.MUST_PASS_ALL );
m_sourceScan.setFilter( fl );
}
}
protected CompareFilter.CompareOp getCompareOpByComparisonType( ColumnFilter.ComparisonType op ) {
CompareFilter.CompareOp comp;
switch ( op ) {
case EQUAL:
comp = CompareFilter.CompareOp.EQUAL;
break;
case NOT_EQUAL:
comp = CompareFilter.CompareOp.NOT_EQUAL;
break;
case GREATER_THAN:
comp = CompareFilter.CompareOp.GREATER;
break;
case GREATER_THAN_OR_EQUAL:
comp = CompareFilter.CompareOp.GREATER_OR_EQUAL;
break;
case LESS_THAN:
comp = CompareFilter.CompareOp.LESS;
break;
case LESS_THAN_OR_EQUAL:
comp = CompareFilter.CompareOp.LESS_OR_EQUAL;
break;
default:
comp = null;
break;
}
return comp;
}
// TODO - Override this method if necessary! Older HBase versions use WritableByteArrayComparable
// newer ones (0.95+) use ByteArrayComparable
public Class<?> getByteArrayComparableClass() throws ClassNotFoundException {
return Class.forName( "org.apache.hadoop.hbase.filter.WritableByteArrayComparable" );
}
// TODO - Override this method if necessary!
// Older HBase versions use org.apache.hadoop.hbase.io.hfile.Compression
// Newer ones (0.95+) use org.apache.hadoop.hbase.io.compress.Compression
public Class<?> getCompressionAlgorithmClass() throws ClassNotFoundException {
return Class.forName( "org.apache.hadoop.hbase.io.hfile.Compression$Algorithm" );
}
// TODO - Override this method if necessary!
// Older HBase versions use org.apache.hadoop.hbase.regionserver.StoreFile.BloomType
// Newer ones (0.95+) use org.apache.hadoop.hbase.regionserver.BloomType
public Class<?> getBloomTypeClass() throws ClassNotFoundException {
return Class.forName( "org.apache.hadoop.hbase.regionserver.StoreFile$BloomType" );
}
// TODO - Override this method to return the specified class for each shim.
// Older HBase versions extend WritableByteArrayComparable
// Newer ones (0.95+) extend ByteArrayComparable
public Class<?> getDeserializedNumericComparatorClass() throws ClassNotFoundException {
return Class.forName( "org.pentaho.hbase.shim.common.DeserializedNumericComparator" );
}
// TODO - Override this method to return the specified class for each shim.
// Older HBase versions extend WritableByteArrayComparable
// Newer ones (0.95+) extend ByteArrayComparable
public Class<?> getDeserializedBooleanComparatorClass() throws ClassNotFoundException {
return Class.forName( "org.pentaho.hbase.shim.common.DeserializedBooleanComparator" );
}
protected void checkResultSet() throws Exception {
if ( m_resultSet == null ) {
throw new Exception( BaseMessages.getString( PKG, "CommonHBaseConnection.Error.NoCurrentResultSet" ) );
}
}
protected void checkForCurrentResultSetRow() throws Exception {
if ( m_currentResultSetRow == null ) {
throw new Exception( BaseMessages.getString( PKG, "CommonHBaseConnection.Error.NoCurrentResultSetRow" ) );
}
}
@Override
public void executeSourceTableScan() throws Exception {
checkConfiguration();
checkSourceTable();
checkSourceScan();
if ( m_sourceScan.getFilter() != null ) {
if ( ( (FilterList) m_sourceScan.getFilter() ).getFilters().size() == 0 ) {
m_sourceScan.setFilter( null );
}
}
m_resultSet = m_sourceTable.getScanner( m_sourceScan );
}
@Override
public boolean resultSetNextRow() throws Exception {
checkResultSet();
m_currentResultSetRow = m_resultSet.next();
return ( m_currentResultSetRow != null );
}
@Override
public boolean checkForHBaseRow( Object rowToCheck ) {
return ( rowToCheck instanceof Result );
}
@Override
public byte[] getRowKey( Object aRow ) throws Exception {
if ( !checkForHBaseRow( aRow ) ) {
throw new Exception( BaseMessages.getString( PKG, "CommonHBaseConnection.Error.ObjectIsNotAnHBaseRow" ) );
}
return ( (Result) aRow ).getRow();
}
@Override
public byte[] getResultSetCurrentRowKey() throws Exception {
checkSourceScan();
checkResultSet();
checkForCurrentResultSetRow();
return getRowKey( m_currentResultSetRow );
}
@Override
public byte[] getRowColumnLatest( Object aRow, String colFamilyName, String colName, boolean colNameIsBinary )
throws Exception {
if ( !checkForHBaseRow( aRow ) ) {
throw new Exception( BaseMessages.getString( PKG, "CommonHBaseConnection.Error.ObjectIsNotAnHBaseRow" ) );
}
byte[]
result =
( (Result) aRow ).getValue( m_bytesUtil.toBytes( colFamilyName ),
colNameIsBinary ? m_bytesUtil.toBytesBinary( colName ) : m_bytesUtil.toBytes( colName ) );
return result;
}
@Override
public byte[] getResultSetCurrentRowColumnLatest( String colFamilyName, String colName, boolean colNameIsBinary )
throws Exception {
checkSourceScan();
checkResultSet();
checkForCurrentResultSetRow();
return getRowColumnLatest( m_currentResultSetRow, colFamilyName, colName, colNameIsBinary );
}
@Override
public NavigableMap<byte[], byte[]> getRowFamilyMap( Object aRow, String familyName ) throws Exception {
if ( !checkForHBaseRow( aRow ) ) {
throw new Exception( BaseMessages.getString( PKG, "CommonHBaseConnection.Error.ObjectIsNotAnHBaseRow" ) );
}
return ( (Result) aRow ).getFamilyMap( m_bytesUtil.toBytes( familyName ) );
}
@Override
public NavigableMap<byte[], byte[]> getResultSetCurrentRowFamilyMap( String familyName ) throws Exception {
checkSourceScan();
checkResultSet();
checkForCurrentResultSetRow();
return getRowFamilyMap( m_currentResultSetRow, familyName );
}
@Override
public NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> getRowMap( Object aRow )
throws Exception {
if ( !checkForHBaseRow( aRow ) ) {
throw new Exception( BaseMessages.getString( PKG, "CommonHBaseConnection.Error.ObjectIsNotAnHBaseRow" ) );
}
return ( (Result) aRow ).getMap();
}
@Override
public NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> getResultSetCurrentRowMap()
throws Exception {
checkSourceScan();
checkResultSet();
checkForCurrentResultSetRow();
return getRowMap( m_currentResultSetRow );
}
protected void checkTargetTable() throws Exception {
if ( m_targetTable == null ) {
throw new Exception( BaseMessages.getString( PKG, "CommonHBaseConnection.Error.NoTargetTable" ) );
}
}
protected void checkTargetPut() throws Exception {
if ( m_currentTargetPut == null ) {
throw new Exception( BaseMessages.getString( PKG, "CommonHBaseConnection.Error.NoTargetPut" ) );
}
}
@Override
public void newTargetTable( String tableName, Properties props ) throws Exception {
checkConfiguration();
closeTargetTable();
m_targetTable = m_factory.getHBaseTable( tableName );
if ( props != null ) {
Set<Object> keys = props.keySet();
for ( Object key : keys ) {
String value = props.getProperty( key.toString() );
if ( key.toString().equals( HTABLE_WRITE_BUFFER_SIZE_KEY ) ) {
m_targetTable.setWriteBufferSize( Long.parseLong( value ) );
m_targetTable.setAutoFlush( false );
}
}
}
}
@Override
public boolean targetTableIsAutoFlush() throws Exception {
checkTargetTable();
return m_targetTable.isAutoFlush();
}
@Override
public void newTargetTablePut( byte[] key, boolean writeToWAL ) throws Exception {
checkTargetTable();
m_currentTargetPut = m_factory.getHBasePut( key );
m_currentTargetPut.setWriteToWAL( writeToWAL );
}
@Override
public void executeTargetTablePut() throws Exception {
checkConfiguration();
checkTargetTable();
checkTargetPut();
m_targetTable.put( m_currentTargetPut );
}
@Override
public void executeTargetTableDelete( byte[] rowKey ) throws Exception {
checkConfiguration();
checkTargetTable();
Delete d = new Delete( rowKey );
m_targetTable.delete( d );
}
@Override
public void flushCommitsTargetTable() throws Exception {
checkConfiguration();
checkTargetTable();
m_targetTable.flushCommits();
}
@Override
public void addColumnToTargetPut( String columnFamily, String columnName, boolean colNameIsBinary, byte[] colValue )
throws Exception {
checkTargetTable();
checkTargetPut();
m_currentTargetPut.addColumn( m_bytesUtil.toBytes( columnFamily ),
colNameIsBinary ? m_bytesUtil.toBytesBinary( columnName ) : m_bytesUtil.toBytes( columnName ), colValue );
}
@Override
public void closeTargetTable() throws Exception {
checkConfiguration();
if ( m_targetTable != null ) {
if ( !m_targetTable.isAutoFlush() ) {
flushCommitsTargetTable();
}
m_targetTable.close();
m_targetTable = null;
}
}
@Override
public void closeSourceResultSet() throws Exception {
checkConfiguration();
// An open result set?
if ( m_resultSet != null ) {
m_resultSet.close();
m_resultSet = null;
m_currentResultSetRow = null;
}
}
@Override
public void closeSourceTable() throws Exception {
checkConfiguration();
closeSourceResultSet();
if ( m_sourceTable != null ) {
m_sourceTable.close();
m_sourceTable = null;
}
}
@Override
public boolean isImmutableBytesWritable( Object o ) {
// For this to work the context class loader must be able to load
// ImmutableBytesWritable.class from the same CL as o.getClass() was loaded
// from
return o instanceof ImmutableBytesWritable;
}
@Override
public void close() throws Exception {
if ( log.isDebug() ) {
log.logDebug( "Closing HBase connection ..." );
}
closeTargetTable();
closeSourceResultSet();
closeSourceTable();
closeClientFactory();
}
void closeClientFactory() {
if ( m_factory != null ) {
m_factory.close();
}
m_factory = null;
}
protected boolean toBoolean( String value ) {
return value.toLowerCase().equals( "y" ) || value.toLowerCase().equals( "yes" ) || value.toLowerCase()
.equals( "true" );
}
@Override
public void obtainAuthTokenForJob( org.pentaho.hadoop.shim.api.Configuration conf ) throws Exception {
}
}