package com.mongodb.hadoop.hive; import com.mongodb.DBCollection; import com.mongodb.MongoClient; import com.mongodb.MongoClientURI; import com.mongodb.hadoop.testutils.BaseHadoopTest; import com.mongodb.hadoop.util.MongoClientURIBuilder; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.AfterClass; import org.junit.BeforeClass; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.zeroturnaround.exec.ProcessExecutor; import java.io.File; import java.net.URISyntaxException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import static java.lang.String.format; public class HiveTest extends BaseHadoopTest { public static final String HDFS_BACKED_TABLE = "hdfs_backed"; public static final String BSON_BACKED_TABLE = "bson_backed"; public static final String MONGO_BACKED_TABLE = "mongo_backed"; public static final String MONGO_COLLECTION = "hive_accessible"; public static final String TEST_SCHEMA = "(id INT, name STRING, age INT)"; public static final String HIVE_TABLE_TYPE = "textfile"; public static final String SERDE_PROPERTIES = "'mongo.columns.mapping'='{\"id\":\"_id\"}'"; public static final String BSON_HDFS_TEST_PATH = "hdfs://localhost:8020/user/hive/warehouse/bson_test_files/"; private static final Logger LOG = LoggerFactory.getLogger(HiveTest.class); private static Connection connection; private final MongoClientURI mongoTestURI; private MongoClient mongoClient; public HiveTest() { mongoTestURI = authCheck(new MongoClientURIBuilder() .collection("mongo_hadoop", MONGO_COLLECTION) ).build(); } public class SimpleMockColumnInfo extends ColumnInfo { public SimpleMockColumnInfo(final String colName) { super(colName, TypeInfoFactory.intTypeInfo, "test_tab", false); } } @BeforeClass public static void setupHive() throws SQLException, ClassNotFoundException { Class.forName("org.apache.hive.jdbc.HiveDriver"); connection = DriverManager.getConnection("jdbc:hive2://", "", ""); } @AfterClass public static void tearDownHive() throws SQLException { if (connection != null) { connection.close(); } } protected MongoClient getMongoClient() { if (mongoClient == null) { try { mongoClient = new MongoClient(getInputUri()); } catch (final Exception e) { throw new RuntimeException(e.getMessage(), e); } } return mongoClient; } protected MongoClientURI getInputUri() { return mongoTestURI; } protected void createHDFSHiveTable( final String name, final String schema, final String delimiter, final String type) throws SQLException { execute( format( "CREATE TABLE %s %s\n" + "ROW FORMAT DELIMITED\n" + "FIELDS TERMINATED BY '%s'\n" + "STORED AS %s", name, schema, delimiter, type)); } protected void dropTable(final String tblName) { try { execute(format("DROP TABLE %s", tblName)); } catch (SQLException e) { LOG.error("Could not drop table " + tblName + ": " + e.getMessage()); } } protected void execute(final String command) throws SQLException { if (LOG.isInfoEnabled()) { LOG.info(format("Executing Hive command: %s", command.replace("\n", "\n\t"))); } Statement statement = connection.createStatement(); statement.execute(command); } protected Results query(final String queryStr) { final Results results = new Results(); final ResultSet resultSet; try { Statement statement = connection.createStatement(); if (LOG.isInfoEnabled()) { LOG.info(format( "Executing Hive query: %s", queryStr.replace("\n", "\n\t"))); } resultSet = statement.executeQuery(queryStr); results.process(resultSet); } catch (SQLException e) { LOG.error("SQL query <" + queryStr + "> threw SQLException; " + "it may not have returned a ResultSet.", e); } return results; } protected Results performTwoTableJOIN(final String firstTable, final String secondTable) { return getAllDataFromTable(format("%s JOIN %s", firstTable, secondTable)); } protected String getPath(final String resource) { try { return new File(getClass().getResource("/" + resource).toURI()).getAbsolutePath(); } catch (final URISyntaxException e) { throw new RuntimeException(e.getMessage(), e); } } protected Results getAllDataFromTable(final String table) { return query("SELECT * FROM " + table); } protected void loadDataIntoHDFSHiveTable() throws SQLException { createEmptyHDFSHiveTable(); execute( format( "LOAD DATA LOCAL INPATH '%s'\n" + "INTO TABLE %s", getPath("test_data.txt"), HDFS_BACKED_TABLE)); } public void createEmptyHDFSHiveTable() throws SQLException { dropTable(HDFS_BACKED_TABLE); createHDFSHiveTable(HDFS_BACKED_TABLE, TEST_SCHEMA, "\\t", HIVE_TABLE_TYPE); } protected void loadDataIntoMongoDBHiveTable(final boolean withSerDeProps) throws SQLException { createMongoBackedTable(withSerDeProps); transferData(HDFS_BACKED_TABLE, MONGO_BACKED_TABLE); } public void createMongoBackedTable(final boolean withSerDeProps) throws SQLException { dropTable(MONGO_BACKED_TABLE); final MongoClientURI uri = authCheck(new MongoClientURIBuilder() .collection("mongo_hadoop", MONGO_COLLECTION) ).build(); execute( format( "CREATE TABLE %s %s\n" + "STORED BY '%s'\n" + (withSerDeProps ? format( "WITH SERDEPROPERTIES(%s)\n", SERDE_PROPERTIES) : "") + "TBLPROPERTIES ('mongo.uri'='%s')", MONGO_BACKED_TABLE, TEST_SCHEMA, MongoStorageHandler.class.getName(), uri )); } protected void transferData(final String from, final String to) throws SQLException { execute( format( "INSERT OVERWRITE TABLE %s " + "SELECT * FROM %s", to, from)); } protected DBCollection getCollection(final String collection) { return getMongoClient().getDB("mongo_hadoop").getCollection(collection); } protected void loadIntoHDFS(final String localPath, final String hdfsPath) { try { new ProcessExecutor(HADOOP_HOME + "/bin/hadoop", "fs", "-mkdir", "-p", hdfsPath) .redirectOutput(System.out) .execute(); new ProcessExecutor(HADOOP_HOME + "/bin/hadoop", "fs", "-put", localPath, hdfsPath) .directory(PROJECT_HOME) .redirectOutput(System.out) .execute(); } catch (final Exception e) { throw new RuntimeException(e.getMessage(), e); } } }