/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hive.hcatalog.mapreduce; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import java.util.concurrent.TimeUnit; import junit.framework.Assert; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MiniMRCluster; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.util.Shell; import org.apache.hive.hcatalog.NoExitSecurityManager; import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; import org.apache.hive.hcatalog.data.DefaultHCatRecord; import org.apache.hive.hcatalog.data.HCatRecord; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; import org.apache.hive.hcatalog.data.schema.HCatSchema; import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; public class TestHCatPartitionPublish { private static Configuration mrConf = null; private static FileSystem fs = null; private static MiniMRCluster mrCluster = null; private static boolean isServerRunning = false; private static int msPort; private static HiveConf hcatConf; private static HiveMetaStoreClient msc; private static SecurityManager securityManager; private static Configuration conf = new Configuration(true); public static File handleWorkDir() throws IOException { String testDir = System.getProperty("test.data.dir", "./"); testDir = testDir + "/test_hcat_partitionpublish_" + Math.abs(new Random().nextLong()) + "/"; File workDir = new File(new File(testDir).getCanonicalPath()); FileUtil.fullyDelete(workDir); workDir.mkdirs(); return workDir; } @BeforeClass public static void setup() throws Exception { File workDir = handleWorkDir(); conf.set("yarn.scheduler.capacity.root.queues", "default"); conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); conf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem"); fs = FileSystem.get(conf); System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); // LocalJobRunner does not work with mapreduce OutputCommitter. So need // to use MiniMRCluster. MAPREDUCE-2350 mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, new JobConf(conf)); mrConf = mrCluster.createJobConf(); if (isServerRunning) { return; } msPort = MetaStoreUtils.findFreePort(); MetaStoreUtils.startMetaStore(msPort, ShimLoader .getHadoopThriftAuthBridge()); Thread.sleep(10000); isServerRunning = true; securityManager = System.getSecurityManager(); System.setSecurityManager(new NoExitSecurityManager()); hcatConf = new HiveConf(TestHCatPartitionPublish.class); hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); hcatConf.setTimeVar(HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, 120, TimeUnit.SECONDS); hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); msc = new HiveMetaStoreClient(hcatConf); System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); } @AfterClass public static void tearDown() throws IOException { if (mrCluster != null) { mrCluster.shutdown(); } System.setSecurityManager(securityManager); isServerRunning = false; } @Test public void testPartitionPublish() throws Exception { String dbName = "default"; String tableName = "testHCatPartitionedTable"; createTable(null, tableName); Map<String, String> partitionMap = new HashMap<String, String>(); partitionMap.put("part1", "p1value1"); partitionMap.put("part0", "p0value1"); ArrayList<HCatFieldSchema> hcatTableColumns = new ArrayList<HCatFieldSchema>(); for (FieldSchema fs : getTableColumns()) { hcatTableColumns.add(HCatSchemaUtils.getHCatFieldSchema(fs)); } runMRCreateFail(dbName, tableName, partitionMap, hcatTableColumns); List<String> ptns = msc.listPartitionNames(dbName, tableName, (short) 10); Assert.assertEquals(0, ptns.size()); Table table = msc.getTable(dbName, tableName); Assert.assertTrue(table != null); Path path = new Path(table.getSd().getLocation() + "/part1=p1value1/part0=p0value1"); Assert.assertFalse(path.getFileSystem(conf).exists(path)); } void runMRCreateFail( String dbName, String tableName, Map<String, String> partitionValues, List<HCatFieldSchema> columns) throws Exception { Job job = new Job(mrConf, "hcat mapreduce write fail test"); job.setJarByClass(this.getClass()); job.setMapperClass(TestHCatPartitionPublish.MapFail.class); // input/output settings job.setInputFormatClass(TextInputFormat.class); Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); // The write count does not matter, as the map will fail in its first // call. createInputFile(path, 5); TextInputFormat.setInputPaths(job, path); job.setOutputFormatClass(HCatOutputFormat.class); OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); HCatOutputFormat.setOutput(job, outputJobInfo); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(DefaultHCatRecord.class); job.setNumReduceTasks(0); HCatOutputFormat.setSchema(job, new HCatSchema(columns)); boolean success = job.waitForCompletion(true); Assert.assertTrue(success == false); } private void createInputFile(Path path, int rowCount) throws IOException { if (fs.exists(path)) { fs.delete(path, true); } FSDataOutputStream os = fs.create(path); for (int i = 0; i < rowCount; i++) { os.writeChars(i + "\n"); } os.close(); } public static class MapFail extends Mapper<LongWritable, Text, BytesWritable, HCatRecord> { @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { { throw new IOException("Exception to mimic job failure."); } } } private void createTable(String dbName, String tableName) throws Exception { String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; try { msc.dropTable(databaseName, tableName); } catch (Exception e) { } // can fail with NoSuchObjectException Table tbl = new Table(); tbl.setDbName(databaseName); tbl.setTableName(tableName); tbl.setTableType("MANAGED_TABLE"); StorageDescriptor sd = new StorageDescriptor(); sd.setCols(getTableColumns()); tbl.setPartitionKeys(getPartitionKeys()); tbl.setSd(sd); sd.setBucketCols(new ArrayList<String>(2)); sd.setSerdeInfo(new SerDeInfo()); sd.getSerdeInfo().setName(tbl.getTableName()); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); sd.getSerdeInfo().setSerializationLib(ColumnarSerDe.class.getName()); sd.setInputFormat(RCFileInputFormat.class.getName()); sd.setOutputFormat(RCFileOutputFormat.class.getName()); Map<String, String> tableParams = new HashMap<String, String>(); tbl.setParameters(tableParams); msc.createTable(tbl); } protected List<FieldSchema> getPartitionKeys() { List<FieldSchema> fields = new ArrayList<FieldSchema>(); // Defining partition names in unsorted order fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); return fields; } protected List<FieldSchema> getTableColumns() { List<FieldSchema> fields = new ArrayList<FieldSchema>(); fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); return fields; } }