/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hive.hcatalog.pig; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.cli.CliSessionState; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.StorageFormats; import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.processors.HiveCommand; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Shell; import org.apache.hive.hcatalog.HcatTestUtils; import org.apache.hive.hcatalog.common.HCatUtil; import org.apache.hive.hcatalog.data.HCatRecord; import org.apache.hive.hcatalog.data.Pair; import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; import org.apache.pig.ExecType; import org.apache.pig.PigServer; import org.apache.pig.data.Tuple; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assume.assumeTrue; @RunWith(Parameterized.class) public class TestHCatLoaderEncryption { private static final AtomicInteger salt = new AtomicInteger(new Random().nextInt()); private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoaderEncryption.class); private final String TEST_DATA_DIR = HCatUtil.makePathASafeFileName(System.getProperty ("java.io.tmpdir") + File.separator + TestHCatLoaderEncryption.class.getCanonicalName() + "-" + System.currentTimeMillis() + "_" + salt.getAndIncrement()); private final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; private final String BASIC_FILE_NAME = TEST_DATA_DIR + "/basic.input.data"; private static final String BASIC_TABLE = "junit_unparted_basic"; private static final String ENCRYPTED_TABLE = "encrypted_table"; private static final String SECURITY_KEY_PROVIDER_URI_NAME = "dfs.encryption.key.provider.uri"; private HadoopShims.MiniDFSShim dfs = null; private HadoopShims.HdfsEncryptionShim hes = null; private final String[] testOnlyCommands = new String[]{"crypto"}; private Driver driver; private Map<Integer, Pair<Integer, String>> basicInputData; private static List<HCatRecord> readRecords = new ArrayList<HCatRecord>(); private static final Map<String, Set<String>> DISABLED_STORAGE_FORMATS = new HashMap<String, Set<String>>() {{ put(IOConstants.PARQUETFILE, new HashSet<String>() {{ add("testReadDataBasic"); add("testReadPartitionedBasic"); add("testProjectionsBasic"); add("testReadDataFromEncryptedHiveTable"); }}); }}; private String storageFormat; @Parameterized.Parameters public static Collection<Object[]> generateParameters() { return StorageFormats.names(); } public TestHCatLoaderEncryption(String storageFormat) { this.storageFormat = storageFormat; } private void dropTable(String tablename) throws IOException, CommandNeedRetryException { dropTable(tablename, driver); } static void dropTable(String tablename, Driver driver) throws IOException, CommandNeedRetryException { driver.run("drop table if exists " + tablename); } private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { createTable(tablename, schema, partitionedBy, driver, storageFormat); } static void createTable(String tablename, String schema, String partitionedBy, Driver driver, String storageFormat) throws IOException, CommandNeedRetryException { String createTable; createTable = "create table " + tablename + "(" + schema + ") "; if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { createTable = createTable + "partitioned by (" + partitionedBy + ") "; } createTable = createTable + "stored as " +storageFormat; executeStatementOnDriver(createTable, driver); } private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { createTable(tablename, schema, null); } /** * Execute Hive CLI statement * @param cmd arbitrary statement to execute */ static void executeStatementOnDriver(String cmd, Driver driver) throws IOException, CommandNeedRetryException { LOG.debug("Executing: " + cmd); CommandProcessorResponse cpr = driver.run(cmd); if(cpr.getResponseCode() != 0) { throw new IOException("Failed to execute \"" + cmd + "\". Driver returned " + cpr.getResponseCode() + " Error: " + cpr.getErrorMessage()); } } @Before public void setup() throws Exception { File f = new File(TEST_WAREHOUSE_DIR); if (f.exists()) { FileUtil.fullyDelete(f); } if (!(new File(TEST_WAREHOUSE_DIR).mkdirs())) { throw new RuntimeException("Could not create " + TEST_WAREHOUSE_DIR); } HiveConf hiveConf = new HiveConf(this.getClass()); hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); hiveConf .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); String s = hiveConf.get("hdfs.minidfs.basedir"); if(s == null || s.length() <= 0) { //return System.getProperty("test.build.data", "build/test/data") + "/dfs/"; hiveConf.set("hdfs.minidfs.basedir", System.getProperty("test.build.data", "build/test/data") + "_" + System.currentTimeMillis() + "_" + salt.getAndIncrement() + "/dfs/"); } driver = new Driver(hiveConf); initEncryptionShim(hiveConf); String encryptedTablePath = TEST_WAREHOUSE_DIR + "/encryptedTable"; SessionState.start(new CliSessionState(hiveConf)); SessionState.get().out = System.out; createTable(BASIC_TABLE, "a int, b string"); createTableInSpecifiedPath(ENCRYPTED_TABLE, "a int, b string", encryptedTablePath, driver); associateEncryptionZoneWithPath(encryptedTablePath); int LOOP_SIZE = 3; String[] input = new String[LOOP_SIZE * LOOP_SIZE]; basicInputData = new HashMap<Integer, Pair<Integer, String>>(); int k = 0; for (int i = 1; i <= LOOP_SIZE; i++) { String si = i + ""; for (int j = 1; j <= LOOP_SIZE; j++) { String sj = "S" + j + "S"; input[k] = si + "\t" + sj; basicInputData.put(k, new Pair<Integer, String>(i, sj)); k++; } } HcatTestUtils.createTestDataFile(BASIC_FILE_NAME, input); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); int i = 0; server.registerQuery("A = load '" + BASIC_FILE_NAME + "' as (a:int, b:chararray);", ++i); server.registerQuery("store A into '" + ENCRYPTED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i); server.executeBatch(); } void initEncryptionShim(HiveConf conf) throws IOException { FileSystem fs; HadoopShims shims = ShimLoader.getHadoopShims(); conf.set(SECURITY_KEY_PROVIDER_URI_NAME, getKeyProviderURI()); int numberOfDataNodes = 4; dfs = shims.getMiniDfs(conf, numberOfDataNodes, true, null); fs = dfs.getFileSystem(); // set up a java key provider for encrypted hdfs cluster hes = shims.createHdfsEncryptionShim(fs, conf); } public static String ensurePathEndsInSlash(String path) { if (path == null) { throw new NullPointerException("Path cannot be null"); } if (path.endsWith(File.separator)) { return path; } else { return path + File.separator; } } private void associateEncryptionZoneWithPath(String path) throws SQLException, CommandNeedRetryException { LOG.info(this.storageFormat + ": associateEncryptionZoneWithPath"); assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); enableTestOnlyCmd(SessionState.get().getConf()); CommandProcessor crypto = getTestCommand("crypto"); if (crypto == null) return; checkExecutionResponse(crypto.run("CREATE_KEY --keyName key_128 --bitLength 128")); checkExecutionResponse(crypto.run("CREATE_ZONE --keyName key_128 --path " + path)); } private void checkExecutionResponse(CommandProcessorResponse response) { int rc = response.getResponseCode(); if (rc != 0) { SessionState.get().out.println(response); } assertEquals("Crypto command failed with the exit code" + rc, 0, rc); } private void removeEncryptionZone() throws SQLException, CommandNeedRetryException { LOG.info(this.storageFormat + ": removeEncryptionZone"); enableTestOnlyCmd(SessionState.get().getConf()); CommandProcessor crypto = getTestCommand("crypto"); if (crypto == null) { return; } checkExecutionResponse(crypto.run("DELETE_KEY --keyName key_128")); } private CommandProcessor getTestCommand(final String commandName) throws SQLException { HiveCommand testCommand = HiveCommand.find(new String[]{commandName}, HiveCommand.ONLY_FOR_TESTING); if (testCommand == null) { return null; } return CommandProcessorFactory .getForHiveCommandInternal(new String[]{commandName}, SessionState.get().getConf(), testCommand.isOnlyForTesting()); } private void enableTestOnlyCmd(HiveConf conf){ StringBuilder securityCMDs = new StringBuilder(conf.getVar(HiveConf.ConfVars.HIVE_SECURITY_COMMAND_WHITELIST)); for(String c : testOnlyCommands){ securityCMDs.append(","); securityCMDs.append(c); } conf.set(HiveConf.ConfVars.HIVE_SECURITY_COMMAND_WHITELIST.toString(), securityCMDs.toString()); } private String getKeyProviderURI() { // Use the target directory if it is not specified String HIVE_ROOT = ensurePathEndsInSlash(System.getProperty("hive.root")); String keyDir = HIVE_ROOT + "ql/target/"; // put the jks file in the current test path only for test purpose return "jceks://file" + new Path(keyDir, "test.jks").toUri(); } @Test public void testReadDataFromEncryptedHiveTableByPig() throws IOException { assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); PigServer server = new PigServer(ExecType.LOCAL); server.registerQuery("X = load '" + ENCRYPTED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); Iterator<Tuple> XIter = server.openIterator("X"); int numTuplesRead = 0; while (XIter.hasNext()) { Tuple t = XIter.next(); assertEquals(2, t.size()); assertNotNull(t.get(0)); assertNotNull(t.get(1)); assertTrue(t.get(0).getClass() == Integer.class); assertTrue(t.get(1).getClass() == String.class); assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); assertEquals(t.get(1), basicInputData.get(numTuplesRead).second); numTuplesRead++; } assertEquals("failed with storage format: " + this.storageFormat, basicInputData.size(), numTuplesRead); } @Test public void testReadDataFromEncryptedHiveTableByHCatMR() throws Exception { assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); readRecords.clear(); Configuration conf = new Configuration(); Job job = new Job(conf, "hcat mapreduce read encryption test"); job.setJarByClass(this.getClass()); job.setMapperClass(TestHCatLoaderEncryption.MapRead.class); // input/output settings job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, ENCRYPTED_TABLE, null); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); FileSystem fs = new LocalFileSystem(); String pathLoc = TEST_DATA_DIR + "/testHCatMREncryptionOutput"; Path path = new Path(pathLoc); if (fs.exists(path)) { fs.delete(path, true); } TextOutputFormat.setOutputPath(job, new Path(pathLoc)); job.waitForCompletion(true); int numTuplesRead = 0; for (HCatRecord hCatRecord : readRecords) { assertEquals(2, hCatRecord.size()); assertNotNull(hCatRecord.get(0)); assertNotNull(hCatRecord.get(1)); assertTrue(hCatRecord.get(0).getClass() == Integer.class); assertTrue(hCatRecord.get(1).getClass() == String.class); assertEquals(hCatRecord.get(0), basicInputData.get(numTuplesRead).first); assertEquals(hCatRecord.get(1), basicInputData.get(numTuplesRead).second); numTuplesRead++; } assertEquals("failed HCat MR read with storage format: " + this.storageFormat, basicInputData.size(), numTuplesRead); } public static class MapRead extends Mapper<WritableComparable, HCatRecord, BytesWritable, Text> { @Override public void map(WritableComparable key, HCatRecord value, Context context) throws IOException, InterruptedException { try { readRecords.add(value); } catch (Exception e) { LOG.error("error when read record.", e); throw new IOException(e); } } } @After public void tearDown() throws Exception { try { if (driver != null) { dropTable(BASIC_TABLE); dropTable(ENCRYPTED_TABLE); removeEncryptionZone(); } } finally { FileUtils.deleteDirectory(new File(TEST_DATA_DIR)); if (dfs != null) { dfs.shutdown(); } } } static void createTableInSpecifiedPath(String tableName, String schema, String path, Driver driver) throws IOException, CommandNeedRetryException { String createTableStr; createTableStr = "create table " + tableName + "(" + schema + ") location \'" + path + "\'"; executeStatementOnDriver(createTableStr, driver); } }