/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.exec; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.io.IOContextMap; import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.CollectDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.ScriptDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapreduce.MRJobConfig; import org.junit.Assert; import org.junit.Test; /** * TestOperators. * */ public class TestOperators extends TestCase { // this is our row to test expressions on protected InspectableObject[] r; @Override protected void setUp() { r = new InspectableObject[5]; ArrayList<String> names = new ArrayList<String>(3); names.add("col0"); names.add("col1"); names.add("col2"); ArrayList<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>( 3); objectInspectors .add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); objectInspectors .add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); objectInspectors .add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); for (int i = 0; i < 5; i++) { ArrayList<String> data = new ArrayList<String>(); data.add("" + i); data.add("" + (i + 1)); data.add("" + (i + 2)); try { r[i] = new InspectableObject(); r[i].o = data; r[i].oi = ObjectInspectorFactory.getStandardStructObjectInspector( names, objectInspectors); } catch (Throwable e) { throw new RuntimeException(e); } } } private void testTaskIds(String [] taskIds, String expectedAttemptId, String expectedTaskId) { Configuration conf = new JobConf(TestOperators.class); for (String one: taskIds) { conf.set("mapred.task.id", one); String attemptId = Utilities.getTaskId(conf); assertEquals(expectedAttemptId, attemptId); assertEquals(Utilities.getTaskIdFromFilename(attemptId), expectedTaskId); assertEquals(Utilities.getTaskIdFromFilename(attemptId + ".gz"), expectedTaskId); assertEquals(Utilities.getTaskIdFromFilename (Utilities.toTempPath(new Path(attemptId + ".gz")).toString()), expectedTaskId); } } /** * More stuff needs to be added here. Currently it only checks some basic * file naming libraries * The old test was deactivated as part of hive-405 */ public void testFileSinkOperator() throws Throwable { try { testTaskIds (new String [] { "attempt_200707121733_0003_m_000005_0", "attempt_local_0001_m_000005_0", "task_200709221812_0001_m_000005_0", "task_local_0001_m_000005_0" }, "000005_0", "000005"); testTaskIds (new String [] { "job_local_0001_map_000005", "job_local_0001_reduce_000005", }, "000005", "000005"); testTaskIds (new String [] {"1234567"}, "1234567", "1234567"); assertEquals(Utilities.getTaskIdFromFilename ("/mnt/dev005/task_local_0001_m_000005_0"), "000005"); System.out.println("FileSink Operator ok"); } catch (Throwable e) { e.printStackTrace(); throw e; } } /** * When ScriptOperator runs external script, it passes job configuration as environment * variables. But environment variables have some system limitations and we have to check * job configuration properties firstly. This test checks that staff. */ public void testScriptOperatorEnvVarsProcessing() throws Throwable { try { ScriptOperator scriptOperator = new ScriptOperator(new CompilationOpContext()); //Environment Variables name assertEquals("a_b_c", scriptOperator.safeEnvVarName("a.b.c")); assertEquals("a_b_c", scriptOperator.safeEnvVarName("a-b-c")); //Environment Variables short values assertEquals("value", scriptOperator.safeEnvVarValue("value", "name", false)); assertEquals("value", scriptOperator.safeEnvVarValue("value", "name", true)); //Environment Variables long values char [] array = new char[20*1024+1]; Arrays.fill(array, 'a'); String hugeEnvVar = new String(array); assertEquals(20*1024+1, hugeEnvVar.length()); assertEquals(20*1024+1, scriptOperator.safeEnvVarValue(hugeEnvVar, "name", false).length()); assertEquals(20*1024, scriptOperator.safeEnvVarValue(hugeEnvVar, "name", true).length()); //Full test Configuration hconf = new JobConf(ScriptOperator.class); hconf.set("name", hugeEnvVar); Map<String, String> env = new HashMap<String, String>(); HiveConf.setBoolVar(hconf, HiveConf.ConfVars.HIVESCRIPTTRUNCATEENV, false); scriptOperator.addJobConfToEnvironment(hconf, env); assertEquals(20*1024+1, env.get("name").length()); HiveConf.setBoolVar(hconf, HiveConf.ConfVars.HIVESCRIPTTRUNCATEENV, true); scriptOperator.addJobConfToEnvironment(hconf, env); assertEquals(20*1024, env.get("name").length()); System.out.println("Script Operator Environment Variables processing ok"); } catch (Throwable e) { e.printStackTrace(); throw e; } } public void testScriptOperatorBlacklistedEnvVarsProcessing() { ScriptOperator scriptOperator = new ScriptOperator(new CompilationOpContext()); Configuration hconf = new JobConf(ScriptOperator.class); Map<String, String> env = new HashMap<String, String>(); HiveConf.setVar(hconf, HiveConf.ConfVars.HIVESCRIPT_ENV_BLACKLIST, "foobar"); hconf.set("foobar", "foobar"); hconf.set("barfoo", "barfoo"); scriptOperator.addJobConfToEnvironment(hconf, env); Assert.assertFalse(env.containsKey("foobar")); Assert.assertTrue(env.containsKey("barfoo")); } public void testScriptOperator() throws Throwable { try { System.out.println("Testing Script Operator"); // col1 ExprNodeDesc exprDesc1 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col1", "", false); // col2 ExprNodeDesc expr1 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col0", "", false); ExprNodeDesc expr2 = new ExprNodeConstantDesc("1"); ExprNodeDesc exprDesc2 = TypeCheckProcFactory.DefaultExprProcessor .getFuncExprNodeDesc("concat", expr1, expr2); // select operator to project these two columns ArrayList<ExprNodeDesc> earr = new ArrayList<ExprNodeDesc>(); earr.add(exprDesc1); earr.add(exprDesc2); ArrayList<String> outputCols = new ArrayList<String>(); for (int i = 0; i < earr.size(); i++) { outputCols.add("_col" + i); } SelectDesc selectCtx = new SelectDesc(earr, outputCols); Operator<SelectDesc> op = OperatorFactory.get(new CompilationOpContext(), SelectDesc.class); op.setConf(selectCtx); // scriptOperator to echo the output of the select TableDesc scriptOutput = PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "a,b"); TableDesc scriptInput = PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "a,b"); ScriptDesc sd = new ScriptDesc("cat", scriptOutput, TextRecordWriter.class, scriptInput, TextRecordReader.class, TextRecordReader.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key")); Operator<ScriptDesc> sop = OperatorFactory.getAndMakeChild(sd, op); // Collect operator to observe the output of the script CollectDesc cd = new CollectDesc(Integer.valueOf(10)); CollectOperator cdop = (CollectOperator) OperatorFactory.getAndMakeChild(cd, sop); op.initialize(new JobConf(TestOperators.class), new ObjectInspector[]{r[0].oi}); // evaluate on row for (int i = 0; i < 5; i++) { op.process(r[i].o, 0); } op.close(false); InspectableObject io = new InspectableObject(); for (int i = 0; i < 5; i++) { cdop.retrieve(io); System.out.println("[" + i + "] io.o=" + io.o); System.out.println("[" + i + "] io.oi=" + io.oi); StructObjectInspector soi = (StructObjectInspector) io.oi; assert (soi != null); StructField a = soi.getStructFieldRef("a"); StructField b = soi.getStructFieldRef("b"); assertEquals("" + (i + 1), ((PrimitiveObjectInspector) a .getFieldObjectInspector()).getPrimitiveJavaObject(soi .getStructFieldData(io.o, a))); assertEquals((i) + "1", ((PrimitiveObjectInspector) b .getFieldObjectInspector()).getPrimitiveJavaObject(soi .getStructFieldData(io.o, b))); } System.out.println("Script Operator ok"); } catch (Throwable e) { e.printStackTrace(); throw e; } } public void testMapOperator() throws Throwable { try { System.out.println("Testing Map Operator"); // initialize configuration JobConf hconf = new JobConf(TestOperators.class); hconf.set(MRJobConfig.MAP_INPUT_FILE, "hdfs:///testDir/testFile"); IOContextMap.get(hconf).setInputPath( new Path("hdfs:///testDir/testFile")); // initialize pathToAliases ArrayList<String> aliases = new ArrayList<String>(); aliases.add("a"); aliases.add("b"); LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>(); pathToAliases.put(new Path("hdfs:///testDir"), aliases); // initialize pathToTableInfo // Default: treat the table as a single column "col" TableDesc td = Utilities.defaultTd; PartitionDesc pd = new PartitionDesc(td, null); LinkedHashMap<Path, org.apache.hadoop.hive.ql.plan.PartitionDesc> pathToPartitionInfo = new LinkedHashMap<>(); pathToPartitionInfo.put(new Path("hdfs:///testDir"), pd); // initialize aliasToWork CompilationOpContext ctx = new CompilationOpContext(); CollectDesc cd = new CollectDesc(Integer.valueOf(1)); CollectOperator cdop1 = (CollectOperator) OperatorFactory .get(ctx, CollectDesc.class); cdop1.setConf(cd); CollectOperator cdop2 = (CollectOperator) OperatorFactory .get(ctx, CollectDesc.class); cdop2.setConf(cd); LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork = new LinkedHashMap<String, Operator<? extends OperatorDesc>>(); aliasToWork.put("a", cdop1); aliasToWork.put("b", cdop2); // initialize mapredWork MapredWork mrwork = new MapredWork(); mrwork.getMapWork().setPathToAliases(pathToAliases); mrwork.getMapWork().setPathToPartitionInfo(pathToPartitionInfo); mrwork.getMapWork().setAliasToWork(aliasToWork); // get map operator and initialize it MapOperator mo = new MapOperator(new CompilationOpContext()); mo.initializeAsRoot(hconf, mrwork.getMapWork()); Text tw = new Text(); InspectableObject io1 = new InspectableObject(); InspectableObject io2 = new InspectableObject(); for (int i = 0; i < 5; i++) { String answer = "[[" + i + ", " + (i + 1) + ", " + (i + 2) + "]]"; tw.set("" + i + "\u0001" + (i + 1) + "\u0001" + (i + 2)); mo.process(tw); cdop1.retrieve(io1); cdop2.retrieve(io2); System.out.println("io1.o.toString() = " + io1.o.toString()); System.out.println("io2.o.toString() = " + io2.o.toString()); System.out.println("answer.toString() = " + answer.toString()); assertEquals(answer.toString(), io1.o.toString()); assertEquals(answer.toString(), io2.o.toString()); } System.out.println("Map Operator ok"); } catch (Throwable e) { e.printStackTrace(); throw (e); } } @Test public void testFetchOperatorContextQuoting() throws Exception { JobConf conf = new JobConf(); ArrayList<Path> list = new ArrayList<Path>(); list.add(new Path("hdfs://nn.example.com/fi\tl\\e\t1")); list.add(new Path("hdfs://nn.example.com/file\t2")); list.add(new Path("file:/file3")); FetchOperator.setFetchOperatorContext(conf, list); String[] parts = conf.get(FetchOperator.FETCH_OPERATOR_DIRECTORY_LIST).split("\t"); assertEquals(3, parts.length); assertEquals("hdfs://nn.example.com/fi\\tl\\\\e\\t1", parts[0]); assertEquals("hdfs://nn.example.com/file\\t2", parts[1]); assertEquals("file:/file3", parts[2]); } /** * A custom input format that checks to make sure that the fetch operator * sets the required attributes. */ public static class CustomInFmt extends TextInputFormat { @Override public InputSplit[] getSplits(JobConf job, int splits) throws IOException { // ensure that the table properties were copied assertEquals("val1", job.get("myprop1")); assertEquals("val2", job.get("myprop2")); // ensure that both of the partitions are in the complete list. String[] dirs = job.get("hive.complete.dir.list").split("\t"); assertEquals(2, dirs.length); assertEquals(true, dirs[0].endsWith("/state=CA")); assertEquals(true, dirs[1].endsWith("/state=OR")); return super.getSplits(job, splits); } } @Test public void testFetchOperatorContext() throws Exception { HiveConf conf = new HiveConf(); conf.set("hive.support.concurrency", "false"); conf.setVar(HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict"); conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); SessionState.start(conf); String cmd = "create table fetchOp (id int, name string) " + "partitioned by (state string) " + "row format delimited fields terminated by '|' " + "stored as " + "inputformat 'org.apache.hadoop.hive.ql.exec.TestOperators$CustomInFmt' " + "outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' " + "tblproperties ('myprop1'='val1', 'myprop2' = 'val2')"; Driver driver = new Driver(); driver.init(); CommandProcessorResponse response = driver.run(cmd); assertEquals(0, response.getResponseCode()); List<Object> result = new ArrayList<Object>(); cmd = "load data local inpath '../data/files/employee.dat' " + "overwrite into table fetchOp partition (state='CA')"; driver.init(); response = driver.run(cmd); assertEquals(0, response.getResponseCode()); cmd = "load data local inpath '../data/files/employee2.dat' " + "overwrite into table fetchOp partition (state='OR')"; driver.init(); response = driver.run(cmd); assertEquals(0, response.getResponseCode()); cmd = "select * from fetchOp"; driver.init(); driver.setMaxRows(500); response = driver.run(cmd); assertEquals(0, response.getResponseCode()); driver.getResults(result); assertEquals(20, result.size()); driver.close(); } @Test public void testNoConditionalTaskSizeForLlap() { ConvertJoinMapJoin convertJoinMapJoin = new ConvertJoinMapJoin(); long defaultNoConditionalTaskSize = 1024L * 1024L * 1024L; HiveConf hiveConf = new HiveConf(); // execution mode not set, null is returned assertEquals(defaultNoConditionalTaskSize, convertJoinMapJoin.getMemoryMonitorInfo(defaultNoConditionalTaskSize, hiveConf).getAdjustedNoConditionalTaskSize()); hiveConf.set(HiveConf.ConfVars.HIVE_EXECUTION_MODE.varname, "llap"); // default executors is 4, max slots is 3. so 3 * 20% of noconditional task size will be oversubscribed hiveConf.set(HiveConf.ConfVars.LLAP_MAPJOIN_MEMORY_OVERSUBSCRIBE_FACTOR.varname, "0.2"); double fraction = hiveConf.getFloatVar(HiveConf.ConfVars.LLAP_MAPJOIN_MEMORY_OVERSUBSCRIBE_FACTOR); int maxSlots = 3; long expectedSize = (long) (defaultNoConditionalTaskSize + (defaultNoConditionalTaskSize * fraction * maxSlots)); assertEquals(expectedSize, convertJoinMapJoin.getMemoryMonitorInfo(defaultNoConditionalTaskSize, hiveConf) .getAdjustedNoConditionalTaskSize()); // num executors is less than max executors per query (which is not expected case), default executors will be // chosen. 4 * 20% of noconditional task size will be oversubscribed int chosenSlots = hiveConf.getIntVar(HiveConf.ConfVars.LLAP_DAEMON_NUM_EXECUTORS); hiveConf.set(HiveConf.ConfVars.LLAP_MEMORY_OVERSUBSCRIPTION_MAX_EXECUTORS_PER_QUERY.varname, "5"); expectedSize = (long) (defaultNoConditionalTaskSize + (defaultNoConditionalTaskSize * fraction * chosenSlots)); assertEquals(expectedSize, convertJoinMapJoin.getMemoryMonitorInfo(defaultNoConditionalTaskSize, hiveConf) .getAdjustedNoConditionalTaskSize()); // disable memory checking hiveConf.set(HiveConf.ConfVars.LLAP_MAPJOIN_MEMORY_MONITOR_CHECK_INTERVAL.varname, "0"); assertFalse( convertJoinMapJoin.getMemoryMonitorInfo(defaultNoConditionalTaskSize, hiveConf).doMemoryMonitoring()); // invalid inflation factor hiveConf.set(HiveConf.ConfVars.LLAP_MAPJOIN_MEMORY_MONITOR_CHECK_INTERVAL.varname, "10000"); hiveConf.set(HiveConf.ConfVars.HIVE_HASH_TABLE_INFLATION_FACTOR.varname, "0.0f"); assertFalse( convertJoinMapJoin.getMemoryMonitorInfo(defaultNoConditionalTaskSize, hiveConf).doMemoryMonitoring()); } }