/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hive.hcatalog.hbase;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hive.cli.CliSessionState;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hive.hcatalog.common.HCatUtil;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
import org.junit.Test;
public class TestPigHBaseStorageHandler extends SkeletonHBaseTest {
private static HiveConf hcatConf;
private static Driver driver;
private static String mypath;
private final byte[] FAMILY = Bytes.toBytes("testFamily");
private final byte[] QUALIFIER1 = Bytes.toBytes("testQualifier1");
private final byte[] QUALIFIER2 = Bytes.toBytes("testQualifier2");
public void Initialize() throws Exception {
hcatConf = new HiveConf(this.getClass());
//hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname,
// HCatSemanticAnalyzer.class.getName());
URI fsuri = getFileSystem().getUri();
Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(),
getTestDir());
hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString());
hcatConf
.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
"org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
//Add hbase properties
for (Map.Entry<String, String> el : getHbaseConf()) {
if (el.getKey().startsWith("hbase.")) {
hcatConf.set(el.getKey(), el.getValue());
}
}
driver = new Driver(hcatConf);
SessionState.start(new CliSessionState(hcatConf));
}
private void populateHBaseTable(String tName) throws IOException {
List<Put> myPuts = generatePuts(tName);
HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName));
table.put(myPuts);
}
private List<Put> generatePuts(String tableName) throws IOException {
List<String> columnFamilies = Arrays.asList("testFamily");
List<Put> myPuts;
myPuts = new ArrayList<Put>();
for (int i = 1; i <=10; i++) {
Put put = new Put(Bytes.toBytes(i));
put.add(FAMILY, QUALIFIER1, 1, Bytes.toBytes("textA-" + i));
put.add(FAMILY, QUALIFIER2, 1, Bytes.toBytes("textB-" + i));
myPuts.add(put);
}
return myPuts;
}
public static void createTestDataFile(String filename) throws IOException {
FileWriter writer = null;
int LOOP_SIZE = 10;
float f = -100.1f;
try {
File file = new File(filename);
file.deleteOnExit();
writer = new FileWriter(file);
for (int i =1; i <= LOOP_SIZE; i++) {
writer.write(i+ "\t" +(f+i)+ "\t" + "textB-" + i + "\n");
}
} finally {
if (writer != null) {
writer.close();
}
}
}
@Test
public void testPigHBaseSchema() throws Exception {
Initialize();
String tableName = newTableName("MyTable");
String databaseName = newTableName("MyDatabase");
//Table name will be lower case unless specified by hbase.table.name property
String hbaseTableName = "testTable";
String db_dir = HCatUtil.makePathASafeFileName(getTestDir() + "/hbasedb");
String dbQuery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '"
+ db_dir + "'";
String deleteQuery = "DROP TABLE "+databaseName+"."+tableName;
String tableQuery = "CREATE TABLE " + databaseName + "." + tableName
+ "(key float, testqualifier1 string, testqualifier2 int) STORED BY " +
"'org.apache.hadoop.hive.hbase.HBaseStorageHandler'"
+ " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')"
+ " TBLPROPERTIES ('hbase.table.name'='"+hbaseTableName+"')";
CommandProcessorResponse responseOne = driver.run(deleteQuery);
assertEquals(0, responseOne.getResponseCode());
CommandProcessorResponse responseTwo = driver.run(dbQuery);
assertEquals(0, responseTwo.getResponseCode());
CommandProcessorResponse responseThree = driver.run(tableQuery);
HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
boolean doesTableExist = hAdmin.tableExists(hbaseTableName);
assertTrue(doesTableExist);
PigServer server = new PigServer(ExecType.LOCAL,hcatConf.getAllProperties());
server.registerQuery("A = load '"+databaseName+"."+tableName+"' using org.apache.hive.hcatalog.pig.HCatLoader();");
Schema dumpedASchema = server.dumpSchema("A");
List<FieldSchema> fields = dumpedASchema.getFields();
assertEquals(3, fields.size());
assertEquals(DataType.FLOAT,fields.get(0).type);
assertEquals("key",fields.get(0).alias.toLowerCase());
assertEquals( DataType.CHARARRAY,fields.get(1).type);
assertEquals("testQualifier1".toLowerCase(), fields.get(1).alias.toLowerCase());
assertEquals( DataType.INTEGER,fields.get(2).type);
assertEquals("testQualifier2".toLowerCase(), fields.get(2).alias.toLowerCase());
}
@Test
public void testPigFilterProjection() throws Exception {
Initialize();
String tableName = newTableName("MyTable");
String databaseName = newTableName("MyDatabase");
//Table name will be lower case unless specified by hbase.table.name property
String hbaseTableName = (databaseName + "." + tableName).toLowerCase();
String db_dir = HCatUtil.makePathASafeFileName(getTestDir() + "/hbasedb");
String dbQuery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '"
+ db_dir + "'";
String deleteQuery = "DROP TABLE "+databaseName+"."+tableName;
String tableQuery = "CREATE TABLE " + databaseName + "." + tableName
+ "(key int, testqualifier1 string, testqualifier2 string) STORED BY " +
"'org.apache.hadoop.hive.hbase.HBaseStorageHandler'" +
" WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')" +
" TBLPROPERTIES ('hbase.table.default.storage.type'='binary')";
CommandProcessorResponse responseOne = driver.run(deleteQuery);
assertEquals(0, responseOne.getResponseCode());
CommandProcessorResponse responseTwo = driver.run(dbQuery);
assertEquals(0, responseTwo.getResponseCode());
CommandProcessorResponse responseThree = driver.run(tableQuery);
HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
boolean doesTableExist = hAdmin.tableExists(hbaseTableName);
assertTrue(doesTableExist);
populateHBaseTable(hbaseTableName);
Configuration conf = new Configuration(getHbaseConf());
HTable table = new HTable(conf, hbaseTableName);
Scan scan = new Scan();
scan.addFamily(Bytes.toBytes("testFamily"));
ResultScanner scanner = table.getScanner(scan);
int index=1;
PigServer server = new PigServer(ExecType.LOCAL,hcatConf.getAllProperties());
server.registerQuery("A = load '"+databaseName+"."+tableName+"' using org.apache.hive.hcatalog.pig.HCatLoader();");
server.registerQuery("B = filter A by key < 5;");
server.registerQuery("C = foreach B generate key,testqualifier2;");
Iterator<Tuple> itr = server.openIterator("C");
//verify if the filter is correct and returns 2 rows and contains 2 columns and the contents match
while(itr.hasNext()){
Tuple t = itr.next();
assertTrue(t.size() == 2);
assertTrue(t.get(0).getClass() == Integer.class);
assertEquals(index,t.get(0));
assertTrue(t.get(1).getClass() == String.class);
assertEquals("textB-"+index,t.get(1));
index++;
}
assertEquals(index-1,4);
}
@Test
public void testPigPopulation() throws Exception {
Initialize();
String tableName = newTableName("MyTable");
String databaseName = newTableName("MyDatabase");
//Table name will be lower case unless specified by hbase.table.name property
String hbaseTableName = (databaseName + "." + tableName).toLowerCase();
String db_dir = HCatUtil.makePathASafeFileName(getTestDir() + "/hbasedb");
String POPTXT_FILE_NAME = db_dir+"testfile.txt";
float f = -100.1f;
String dbQuery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '"
+ db_dir + "'";
String deleteQuery = "DROP TABLE "+databaseName+"."+tableName;
String tableQuery = "CREATE TABLE " + databaseName + "." + tableName
+ "(key int, testqualifier1 float, testqualifier2 string) STORED BY " +
"'org.apache.hadoop.hive.hbase.HBaseStorageHandler'"
+ " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')"
+ " TBLPROPERTIES ('hbase.table.default.storage.type'='binary')";
String selectQuery = "SELECT * from "+databaseName.toLowerCase()+"."+tableName.toLowerCase();
CommandProcessorResponse responseOne = driver.run(deleteQuery);
assertEquals(0, responseOne.getResponseCode());
CommandProcessorResponse responseTwo = driver.run(dbQuery);
assertEquals(0, responseTwo.getResponseCode());
CommandProcessorResponse responseThree = driver.run(tableQuery);
HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
boolean doesTableExist = hAdmin.tableExists(hbaseTableName);
assertTrue(doesTableExist);
createTestDataFile(POPTXT_FILE_NAME);
PigServer server = new PigServer(ExecType.LOCAL,hcatConf.getAllProperties());
server.registerQuery("A = load '"+POPTXT_FILE_NAME+"' using PigStorage() as (key:int, testqualifier1:float, testqualifier2:chararray);");
server.registerQuery("B = filter A by (key > 2) AND (key < 8) ;");
server.registerQuery("store B into '"+databaseName.toLowerCase()+"."+tableName.toLowerCase()+"' using org.apache.hive.hcatalog.pig.HCatStorer();");
server.registerQuery("C = load '"+databaseName.toLowerCase()+"."+tableName.toLowerCase()+"' using org.apache.hive.hcatalog.pig.HCatLoader();");
// Schema should be same
Schema dumpedBSchema = server.dumpSchema("C");
List<FieldSchema> fields = dumpedBSchema.getFields();
assertEquals(3, fields.size());
assertEquals(DataType.INTEGER,fields.get(0).type);
assertEquals("key",fields.get(0).alias.toLowerCase());
assertEquals( DataType.FLOAT,fields.get(1).type);
assertEquals("testQualifier1".toLowerCase(), fields.get(1).alias.toLowerCase());
assertEquals( DataType.CHARARRAY,fields.get(2).type);
assertEquals("testQualifier2".toLowerCase(), fields.get(2).alias.toLowerCase());
//Query the hbase table and check the key is valid and only 5 are present
Configuration conf = new Configuration(getHbaseConf());
HTable table = new HTable(conf, hbaseTableName);
Scan scan = new Scan();
scan.addFamily(Bytes.toBytes("testFamily"));
byte[] familyNameBytes = Bytes.toBytes("testFamily");
ResultScanner scanner = table.getScanner(scan);
int index=3;
int count=0;
for(Result result: scanner) {
//key is correct
assertEquals(index,Bytes.toInt(result.getRow()));
//first column exists
assertTrue(result.containsColumn(familyNameBytes,Bytes.toBytes("testQualifier1")));
//value is correct
assertEquals((index+f),Bytes.toFloat(result.getValue(familyNameBytes,Bytes.toBytes("testQualifier1"))),0);
//second column exists
assertTrue(result.containsColumn(familyNameBytes,Bytes.toBytes("testQualifier2")));
//value is correct
assertEquals(("textB-"+index).toString(),Bytes.toString(result.getValue(familyNameBytes,Bytes.toBytes("testQualifier2"))));
index++;
count++;
}
// 5 rows should be returned
assertEquals(count,5);
//Check if hive returns results correctly
driver.run(selectQuery);
ArrayList<String> result = new ArrayList<String>();
driver.getResults(result);
//Query using the hive command line
assertEquals(5, result.size());
Iterator<String> itr = result.iterator();
for(int i = 3; i <= 7; i++) {
String tokens[] = itr.next().split("\\s+");
assertEquals(i,Integer.parseInt(tokens[0]));
assertEquals(i+f,Float.parseFloat(tokens[1]),0);
assertEquals(("textB-"+i).toString(),tokens[2]);
}
//delete the table from the database
CommandProcessorResponse responseFour = driver.run(deleteQuery);
assertEquals(0, responseFour.getResponseCode());
}
}