/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.blur.hive; import static org.junit.Assert.assertEquals; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.lang.reflect.Field; import java.net.ServerSocket; import java.sql.Connection; import java.sql.Date; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Statement; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import org.apache.blur.MiniCluster; import org.apache.blur.mapreduce.lib.BlurColumn; import org.apache.blur.mapreduce.lib.BlurRecord; import org.apache.blur.thirdparty.thrift_0_9_0.TException; import org.apache.blur.thrift.BlurClient; import org.apache.blur.thrift.generated.Blur.Iface; import org.apache.blur.thrift.generated.BlurException; import org.apache.blur.thrift.generated.BlurQuery; import org.apache.blur.thrift.generated.BlurResults; import org.apache.blur.thrift.generated.ColumnDefinition; import org.apache.blur.thrift.generated.Query; import org.apache.blur.thrift.generated.TableDescriptor; import org.apache.blur.utils.BlurConstants; import org.apache.blur.utils.GCWatcher; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.jdbc.HiveDriver; import org.apache.hive.service.server.HiveServer2; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import com.google.common.base.Splitter; public class BlurSerDeTest { public static final File WAREHOUSE = new File("./target/tmp/warehouse"); public static final String COLUMN_SEP = new String(new char[] { 1 }); public static final String ITEM_SEP = new String(new char[] { 2 }); public static final File DERBY_FILE = new File("derby.log"); public static final File METASTORE_DB_FILE = new File("metastore_db"); private static final String FAM = "fam0"; private static final String YYYYMMDD = "yyyyMMdd"; private static final String YYYY_MM_DD = "yyyy-MM-dd"; private static final String TEST = "test"; private static final File TMPDIR = new File(System.getProperty("blur.tmp.dir", "./target/tmp_BlurSerDeTest")); private static MiniCluster miniCluster; private static boolean externalProcesses = false; @BeforeClass public static void startCluster() throws IOException { System.setProperty("hadoop.log.dir", "./target/tmp_BlurSerDeTest_hadoop_log"); GCWatcher.init(0.60); LocalFileSystem localFS = FileSystem.getLocal(new Configuration()); File testDirectory = new File(TMPDIR, "blur-SerDe-test").getAbsoluteFile(); testDirectory.mkdirs(); Path directory = new Path(testDirectory.getPath()); FsPermission dirPermissions = localFS.getFileStatus(directory).getPermission(); FsAction userAction = dirPermissions.getUserAction(); FsAction groupAction = dirPermissions.getGroupAction(); FsAction otherAction = dirPermissions.getOtherAction(); StringBuilder builder = new StringBuilder(); builder.append(userAction.ordinal()); builder.append(groupAction.ordinal()); builder.append(otherAction.ordinal()); String dirPermissionNum = builder.toString(); System.setProperty("dfs.datanode.data.dir.perm", dirPermissionNum); testDirectory.delete(); miniCluster = new MiniCluster(); miniCluster.startBlurCluster(new File(testDirectory, "cluster").getAbsolutePath(), 2, 3, true, externalProcesses); miniCluster.startMrMiniCluster(); } @AfterClass public static void shutdownCluster() throws IOException { miniCluster.stopMrMiniCluster(); miniCluster.shutdownBlurCluster(); } private String _mrWorkingPath; @Before public void setup() throws BlurException, TException, IOException { _mrWorkingPath = miniCluster.getFileSystemUri().toString() + "/mrworkingpath"; String controllerConnectionStr = miniCluster.getControllerConnectionStr(); Iface client = BlurClient.getClient(controllerConnectionStr); List<String> tableList = client.tableList(); if (!tableList.contains(TEST)) { TableDescriptor tableDescriptor = new TableDescriptor(); tableDescriptor.setName(TEST); tableDescriptor.setShardCount(1); tableDescriptor.setTableUri(miniCluster.getFileSystemUri().toString() + "/blur/tables/test"); tableDescriptor.putToTableProperties(BlurConstants.BLUR_BULK_UPDATE_WORKING_PATH, _mrWorkingPath); client.createTable(tableDescriptor); Map<String, String> props = new HashMap<String, String>(); props.put("dateFormat", YYYYMMDD); client.addColumnDefinition(TEST, cd(false, FAM, "string-col-single", "string")); client.addColumnDefinition(TEST, cd(false, FAM, "text-col-single", "text")); client.addColumnDefinition(TEST, cd(false, FAM, "stored-col-single", "stored")); client.addColumnDefinition(TEST, cd(false, FAM, "double-col-single", "double")); client.addColumnDefinition(TEST, cd(false, FAM, "float-col-single", "float")); client.addColumnDefinition(TEST, cd(false, FAM, "long-col-single", "long")); client.addColumnDefinition(TEST, cd(false, FAM, "int-col-single", "int")); client.addColumnDefinition(TEST, cd(false, FAM, "date-col-single", "date", props)); client.addColumnDefinition(TEST, cd(false, FAM, "geo-col-single", "geo-pointvector")); client.addColumnDefinition(TEST, cd(true, FAM, "string-col-multi", "string")); client.addColumnDefinition(TEST, cd(true, FAM, "text-col-multi", "text")); client.addColumnDefinition(TEST, cd(true, FAM, "stored-col-multi", "stored")); client.addColumnDefinition(TEST, cd(true, FAM, "double-col-multi", "double")); client.addColumnDefinition(TEST, cd(true, FAM, "float-col-multi", "float")); client.addColumnDefinition(TEST, cd(true, FAM, "long-col-multi", "long")); client.addColumnDefinition(TEST, cd(true, FAM, "int-col-multi", "int")); client.addColumnDefinition(TEST, cd(true, FAM, "date-col-multi", "date", props)); } rmr(WAREHOUSE); rmr(METASTORE_DB_FILE); rmr(DERBY_FILE); } @After public void teardown() { rmr(WAREHOUSE); rmr(METASTORE_DB_FILE); rmr(DERBY_FILE); } public static void rmr(File file) { if (!file.exists()) { return; } if (file.isDirectory()) { for (File f : file.listFiles()) { rmr(f); } } file.delete(); } private ColumnDefinition cd(boolean multiValue, String family, String columnName, String type) { return cd(multiValue, family, columnName, type, null); } private ColumnDefinition cd(boolean multiValue, String family, String columnName, String type, Map<String, String> props) { ColumnDefinition columnDefinition = new ColumnDefinition(family, columnName, null, false, type, props, false); columnDefinition.setMultiValueField(multiValue); return columnDefinition; } @Test public void test1() throws SerDeException { long now = System.currentTimeMillis(); Date date = new Date(now); SimpleDateFormat simpleDateFormat = new SimpleDateFormat(YYYYMMDD); BlurSerDe blurSerDe = new BlurSerDe(); Configuration conf = new Configuration(); Properties tbl = new Properties(); tbl.put(BlurSerDe.TABLE, TEST); tbl.put(BlurSerDe.FAMILY, FAM); tbl.put(BlurSerDe.ZK, miniCluster.getZkConnectionString()); blurSerDe.initialize(conf, tbl); ObjectInspector objectInspector = blurSerDe.getObjectInspector(); Object[] row = new Object[19]; int c = 0; row[c++] = "rowid"; row[c++] = "recordid"; row[c++] = new Object[] { date, date }; row[c++] = date; row[c++] = new Object[] { 1234.5678, 4321.5678 }; row[c++] = 1234.5678; row[c++] = new Object[] { 1234.567f, 4321.567f }; row[c++] = 1234.567f; row[c++] = new Object[] { 1.0f, 2.0f }; row[c++] = new Object[] { 12345678, 87654321 }; row[c++] = 12345678; row[c++] = new Object[] { 12345678l, 87654321l }; row[c++] = 12345678l; row[c++] = new Object[] { "stored input1", "stored input2" }; row[c++] = "stored input"; row[c++] = new Object[] { "string input1", "string input2" }; row[c++] = "string input"; row[c++] = new Object[] { "text input1", "text input2" }; row[c++] = "text input"; BlurRecord blurRecord = (BlurRecord) blurSerDe.serialize(row, objectInspector); assertEquals("rowid", blurRecord.getRowId()); assertEquals("recordid", blurRecord.getRecordId()); Map<String, List<String>> columns = toMap(blurRecord.getColumns()); assertEquals(list("string input"), columns.get("string-col-single")); assertEquals(list("string input1", "string input2"), columns.get("string-col-multi")); assertEquals(list("text input"), columns.get("text-col-single")); assertEquals(list("text input1", "text input2"), columns.get("text-col-multi")); assertEquals(list("stored input"), columns.get("stored-col-single")); assertEquals(list("stored input1", "stored input2"), columns.get("stored-col-multi")); assertEquals(list("1234.5678"), columns.get("double-col-single")); assertEquals(list("1234.5678", "4321.5678"), columns.get("double-col-multi")); assertEquals(list("1234.567"), columns.get("float-col-single")); assertEquals(list("1234.567", "4321.567"), columns.get("float-col-multi")); assertEquals(list("12345678"), columns.get("long-col-single")); assertEquals(list("12345678", "87654321"), columns.get("long-col-multi")); assertEquals(list("12345678"), columns.get("int-col-single")); assertEquals(list("12345678", "87654321"), columns.get("int-col-multi")); assertEquals(list(simpleDateFormat.format(date)), columns.get("date-col-single")); assertEquals(list(simpleDateFormat.format(date), simpleDateFormat.format(date)), columns.get("date-col-multi")); assertEquals(list("1.0,2.0"), columns.get("geo-col-single")); } @Test public void test2() throws SQLException, ClassNotFoundException, IOException, BlurException, TException, InterruptedException { int totalRecords = runLoad(true); Iface client = BlurClient.getClientFromZooKeeperConnectionStr(miniCluster.getZkConnectionString()); BlurQuery blurQuery = new BlurQuery(); Query query = new Query(); query.setQuery("*"); blurQuery.setQuery(query); BlurResults results = client.query(TEST, blurQuery); assertEquals(totalRecords, results.getTotalResults()); } @Test public void test3() throws Exception { int totalRecords = runLoad(false); Iface client = BlurClient.getClientFromZooKeeperConnectionStr(miniCluster.getZkConnectionString()); BlurQuery blurQuery = new BlurQuery(); Query query = new Query(); query.setQuery("*"); blurQuery.setQuery(query); BlurResults results = client.query(TEST, blurQuery); assertEquals(totalRecords, results.getTotalResults()); } private int runLoad(boolean disableMrUpdate) throws IOException, InterruptedException, ClassNotFoundException, SQLException { Configuration configuration = miniCluster.getMRConfiguration(); writeSiteFiles(configuration); HiveConf hiveConf = new HiveConf(configuration, getClass()); hiveConf.set("hive.server2.thrift.port", "0"); HiveServer2 hiveServer2 = new HiveServer2(); hiveServer2.init(hiveConf); hiveServer2.start(); int port = waitForStartupAndGetPort(hiveServer2); Class.forName(HiveDriver.class.getName()); String userName = UserGroupInformation.getCurrentUser().getShortUserName(); Connection connection = DriverManager.getConnection("jdbc:hive2://localhost:" + port, userName, ""); UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); run(connection, "set blur.user.name=" + currentUser.getUserName()); run(connection, "set blur.mr.update.disabled=" + disableMrUpdate); run(connection, "set hive.metastore.warehouse.dir=" + WAREHOUSE.toURI().toString()); run(connection, "create database if not exists testdb"); run(connection, "use testdb"); run(connection, "CREATE TABLE if not exists testtable ROW FORMAT SERDE 'org.apache.blur.hive.BlurSerDe' " + "WITH SERDEPROPERTIES ( 'blur.zookeeper.connection'='" + miniCluster.getZkConnectionString() + "', " + "'blur.table'='" + TEST + "', 'blur.family'='" + FAM + "' ) " + "STORED BY 'org.apache.blur.hive.BlurHiveStorageHandler'"); run(connection, "desc testtable"); String createLoadTable = buildCreateLoadTable(connection); run(connection, createLoadTable); File dbDir = new File(WAREHOUSE, "testdb.db"); File tableDir = new File(dbDir, "loadtable"); int totalRecords = 100; generateData(tableDir, totalRecords); run(connection, "select * from loadtable"); run(connection, "set " + BlurSerDe.BLUR_BLOCKING_APPLY + "=true"); run(connection, "insert into table testtable select * from loadtable"); connection.close(); hiveServer2.stop(); return totalRecords; } private void writeSiteFiles(Configuration configuration) throws FileNotFoundException, IOException { String name = BlurHiveMRLoaderOutputCommitter.MAPRED_SITE_XML; if (miniCluster.useYarn()) { name = BlurHiveMRLoaderOutputCommitter.YARN_SITE_XML; } String classPath = System.getProperty("java.class.path"); for (String path : Splitter.on(":").split(classPath)) { File file = new File(path); if (file.getName().equals("test-classes")) { writeFile(new File(file, name), configuration); return; } } } private void writeFile(File file, Configuration configuration) throws FileNotFoundException, IOException { FileOutputStream outputStream = new FileOutputStream(file); configuration.writeXml(outputStream); outputStream.close(); } private void generateData(File file, int totalRecords) throws IOException { SimpleDateFormat simpleDateFormat = new SimpleDateFormat(YYYY_MM_DD); file.mkdirs(); PrintWriter print = new PrintWriter(new File(file, "data")); Date date = new Date(System.currentTimeMillis()); for (int i = 0; i < totalRecords; i++) { // rowid print.print("rowid" + i); print.print(COLUMN_SEP); // recordid print.print("recordid" + i); print.print(COLUMN_SEP); { // date_col_multi print.print(simpleDateFormat.format(date)); print.print(ITEM_SEP); print.print(simpleDateFormat.format(date)); } print.print(COLUMN_SEP); // date_col_single print.print(simpleDateFormat.format(date)); print.print(COLUMN_SEP); { // double_col_multi print.print("1.0"); print.print(ITEM_SEP); print.print("2.0"); } print.print(COLUMN_SEP); // double_col_single print.print("3.0"); print.print(COLUMN_SEP); { // float_col_multi print.print("4.0"); print.print(ITEM_SEP); print.print("5.0"); } print.print(COLUMN_SEP); // float_col_single print.print("6.0"); print.print(COLUMN_SEP); // geo_col_single print.print("10.0"); print.print(ITEM_SEP); print.print("10.0"); print.print(COLUMN_SEP); { // int_col_multi print.print("1"); print.print(ITEM_SEP); print.print("2"); } print.print(COLUMN_SEP); // int_col_single print.print("3"); print.print(COLUMN_SEP); { // long_col_multi print.print("4"); print.print(ITEM_SEP); print.print("5"); } print.print(COLUMN_SEP); // long_col_single print.print("6"); print.print(COLUMN_SEP); { // stored_col_multi print.print("stored_1"); print.print(ITEM_SEP); print.print("stored_2"); } print.print(COLUMN_SEP); // stored_col_single print.print("stored_3"); print.print(COLUMN_SEP); { // string_col_multi print.print("string_1"); print.print(ITEM_SEP); print.print("string_2"); } print.print(COLUMN_SEP); // string_col_single print.print("string_3"); print.print(COLUMN_SEP); { // text_col_multi print.print("text_1"); print.print(ITEM_SEP); print.print("text_2"); } print.print(COLUMN_SEP); // text_col_single print.print("text_3"); print.println(); } print.close(); } private String buildCreateLoadTable(Connection connection) throws SQLException { StringBuilder builder = new StringBuilder("create TABLE if not exists loadtable ("); Statement statement = connection.createStatement(); if (statement.execute("desc testtable")) { ResultSet resultSet = statement.getResultSet(); boolean first = true; while (resultSet.next()) { if (!first) { builder.append(", "); } Object name = resultSet.getObject(1); Object type = resultSet.getObject(2); builder.append(name.toString()); builder.append(' '); builder.append(type.toString()); first = false; } builder.append(")"); return builder.toString(); } throw new RuntimeException("Can't build create table script."); } public static void run(Connection connection, String sql) throws SQLException { System.out.println("Running:" + sql); Statement statement = connection.createStatement(); if (statement.execute(sql)) { ResultSet resultSet = statement.getResultSet(); while (resultSet.next()) { ResultSetMetaData metaData = resultSet.getMetaData(); int columnCount = metaData.getColumnCount(); for (int i = 1; i <= columnCount; i++) { System.out.print(resultSet.getObject(i) + "\t"); } System.out.println(); } } statement.close(); } private List<String> list(String... sarray) { List<String> list = new ArrayList<String>(); for (String s : sarray) { list.add(s); } return list; } private Map<String, List<String>> toMap(List<BlurColumn> columns) { Map<String, List<String>> map = new HashMap<String, List<String>>(); for (BlurColumn blurColumn : columns) { String name = blurColumn.getName(); List<String> list = map.get(name); if (list == null) { map.put(name, list = new ArrayList<String>()); } list.add(blurColumn.getValue()); } return map; } @SuppressWarnings("resource") private int waitForStartupAndGetPort(HiveServer2 hiveServer2) throws InterruptedException { while (true) { // thriftCLIService->server->serverTransport_->serverSocket_ Thread.sleep(100); Object o1 = getObject(hiveServer2, "thriftCLIService"); if (o1 == null) { continue; } Object o2 = getObject(o1, "server"); if (o2 == null) { continue; } Object o3 = getObject(o2, "serverTransport_"); if (o3 == null) { continue; } Object o4 = getObject(o3, "serverSocket_"); if (o4 == null) { continue; } ServerSocket socket = (ServerSocket) o4; return socket.getLocalPort(); } } private Object getObject(Object o, String field) { return getObject(o, field, o.getClass()); } private Object getObject(Object o, String field, Class<? extends Object> clazz) { try { Field declaredField = clazz.getDeclaredField(field); return getObject(o, declaredField); } catch (NoSuchFieldException e) { return getObject(o, field, clazz.getSuperclass()); } catch (SecurityException e) { throw new RuntimeException(e); } } private Object getObject(Object o, Field field) { field.setAccessible(true); try { return field.get(o); } catch (Exception e) { throw new RuntimeException(e); } } }