package com.xavient.dip.spark.writer; import java.io.Serializable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.spark.JavaHBaseContext; import org.apache.hadoop.hbase.util.Bytes; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.streaming.api.java.JavaDStream; import com.xavient.dip.common.AppArgs; import com.xavient.dip.common.config.DiPConfiguration; public class SparkHBaseWriter implements Serializable { private static final long serialVersionUID = -4652795987962410281L; private String tableName; private String columnFamily; private String[] columnFields; private JavaHBaseContext hbaseContext; public SparkHBaseWriter(JavaSparkContext jsc, AppArgs appArgs) { super(); this.tableName = appArgs.getProperty(DiPConfiguration.HBASE_TABLE_NAME); this.columnFamily = appArgs.getProperty(DiPConfiguration.HBASE_COL_FAMILY); this.columnFields = appArgs.getProperty(DiPConfiguration.HBASE_COL_NAMES).split("\\|"); this.hbaseContext = new JavaHBaseContext(jsc, getConf(appArgs)); } public <T> void write(JavaDStream<T> stream) { hbaseContext.streamBulkPut(stream, TableName.valueOf(tableName), record -> { Object[] data = (Object[]) record; Put put = new Put(Bytes.toBytes(String.valueOf(data[1]))); for (int i = 2; i < data.length; i++) { put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnFields[i - 2]), Bytes.toBytes(String.valueOf(data[i]))); } return put; }); } private static Configuration getConf(AppArgs appArgs) { Configuration conf = HBaseConfiguration.create(); conf.set("hbase.master", appArgs.getProperty(DiPConfiguration.HBASE_MASTER)); conf.set("timeout", "120000"); conf.set("hbase.zookeeper.quorum", appArgs.getProperty(DiPConfiguration.ZK_HOST) + ":" + appArgs.getProperty(DiPConfiguration.ZK_PORT)); conf.set("zookeeper.znode.parent", "/hbase-unsecure"); return conf; } }