/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.optimizer; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFPower; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.BeforeClass; import org.junit.Test; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; public class TestColumnPrunerProcCtx { // struct<a:boolean,b:double> private static TypeInfo col1Type; // double private static TypeInfo col2Type; // struct<col1:struct<a:boolean,b:double>,col2:double> private static TypeInfo col3Type; @BeforeClass public static void setup(){ List<String> ns = new ArrayList<>(); ns.add("a"); ns.add("b"); List<TypeInfo> tis = new ArrayList<>(); TypeInfo aType = TypeInfoFactory.booleanTypeInfo; TypeInfo bType = TypeInfoFactory.doubleTypeInfo; tis.add(aType); tis.add(bType); col1Type = TypeInfoFactory.getStructTypeInfo(ns, tis); col2Type = TypeInfoFactory.doubleTypeInfo; List<String> names = new ArrayList<>(); names.add("col1"); names.add("col2"); List<TypeInfo> typeInfos = new ArrayList<>(); typeInfos.add(col1Type); typeInfos.add(col2Type); col3Type = TypeInfoFactory.getStructTypeInfo(names, typeInfos); } // Test select root.col1.a from root:struct<col1:struct<a:boolean,b:double>,col2:double> @Test public void testGetSelectNestedColPathsFromChildren1() { ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null); ExprNodeDesc colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false); ExprNodeDesc col1 = new ExprNodeFieldDesc(col1Type, colDesc, "col1", false); ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(TypeInfoFactory.booleanTypeInfo, col1, "a", false); final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0")); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(fieldDesc), paths); List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths); compareTestResults(groups, "root.col1.a"); } // Test select root.col1 from root:struct<col1:struct<a:boolean,b:double>,col2:double> @Test public void testGetSelectNestedColPathsFromChildren2() { ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null); ExprNodeDesc colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false); ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(col1Type, colDesc, "col1", false); final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0")); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(fieldDesc), paths); List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths); compareTestResults(groups, "root.col1"); } // Test select root.col2 from root:struct<col1:struct<a:boolean,b:double>,col2:double> @Test public void testGetSelectNestedColPathsFromChildren3() { ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null); ExprNodeDesc colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false); ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(col1Type, colDesc, "col2", false); final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0")); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(fieldDesc), paths); List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths); compareTestResults(groups, "root.col2"); } // Test select root from root:struct<col1:struct<a:boolean,b:double>,col2:double> @Test public void testGetSelectNestedColPathsFromChildren4() { ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null); ExprNodeDesc colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false); final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0")); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(colDesc), paths); List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths); compareTestResults(groups, "root"); } // Test select named_struct from named_struct:struct<a:boolean,b:double> @Test public void testGetSelectNestedColPathsFromChildren5(){ ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null); ExprNodeConstantDesc constADesc = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, "a"); ExprNodeConstantDesc constBDesc = new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, "b"); List<ExprNodeDesc> list = new ArrayList<>(); list.add(constADesc); list.add(constBDesc); GenericUDF udf = mock(GenericUDF.class); ExprNodeDesc funcDesc = new ExprNodeGenericFuncDesc(col1Type, udf, "named_struct", list); ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(TypeInfoFactory.doubleTypeInfo, funcDesc, "foo", false); final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0")); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(fieldDesc), paths); List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths); // Return empty result since only constant Desc exists assertEquals(0, groups.size()); } // Test select abs(root.col1.b) from table test(root struct<col1:struct<a:boolean,b:double>, // col2:double>); @Test public void testGetSelectNestedColPathsFromChildren6(){ ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null); ExprNodeDesc colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false); ExprNodeDesc col1 = new ExprNodeFieldDesc(col1Type, colDesc, "col1", false); ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(TypeInfoFactory.doubleTypeInfo, col1, "b", false); final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0")); GenericUDF udf = mock(GenericUDFBridge.class); List<ExprNodeDesc> list = new ArrayList<>(); list.add(fieldDesc); ExprNodeDesc funcDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.binaryTypeInfo, udf, "abs", list); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(funcDesc), paths); List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths); compareTestResults(groups, "root.col1.b"); } // Test select pow(root.col1.b, root.col2) from table test(root // struct<col1:struct<a:boolean,b:double>, col2:double>); @Test public void testGetSelectNestedColPathsFromChildren7(){ ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null); ExprNodeDesc colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false); ExprNodeDesc col1 = new ExprNodeFieldDesc(col1Type, colDesc, "col1", false); ExprNodeDesc fieldDesc1 = new ExprNodeFieldDesc(TypeInfoFactory.doubleTypeInfo, col1, "b", false); colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false); ExprNodeDesc col2 = new ExprNodeFieldDesc(col2Type, colDesc, "col2", false); final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0")); GenericUDF udf = mock(GenericUDFPower.class); List<ExprNodeDesc> list = new ArrayList<>(); list.add(fieldDesc1); list.add(col2); ExprNodeDesc funcDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.doubleTypeInfo, udf, "pow", list); SelectOperator selectOperator = buildSelectOperator(Arrays.asList(funcDesc), paths); List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths); compareTestResults(groups, "root.col1.b", "root.col2"); } @Test public void testFieldNodeFromString() { FieldNode fn = FieldNode.fromPath("s.a.b"); assertEquals("s", fn.getFieldName()); assertEquals(1, fn.getNodes().size()); FieldNode childFn = fn.getNodes().get(0); assertEquals("a", childFn.getFieldName()); assertEquals(1, childFn.getNodes().size()); assertEquals("b", childFn.getNodes().get(0).getFieldName()); } @Test public void testMergeFieldNode() { FieldNode fn1 = FieldNode.fromPath("s.a.b"); FieldNode fn2 = FieldNode.fromPath("s.a"); assertEquals(fn2, FieldNode.mergeFieldNode(fn1, fn2)); assertEquals(fn2, FieldNode.mergeFieldNode(fn2, fn1)); fn1 = FieldNode.fromPath("s.a"); fn2 = FieldNode.fromPath("p.b"); assertNull(FieldNode.mergeFieldNode(fn1, fn2)); fn1 = FieldNode.fromPath("s.a.b"); fn2 = FieldNode.fromPath("s.a.c"); FieldNode fn = FieldNode.mergeFieldNode(fn1, fn2); assertEquals("s", fn.getFieldName()); FieldNode childFn = fn.getNodes().get(0); assertEquals("a", childFn.getFieldName()); assertEquals(2, childFn.getNodes().size()); assertEquals("b", childFn.getNodes().get(0).getFieldName()); assertEquals("c", childFn.getNodes().get(1).getFieldName()); } private void compareTestResults(List<FieldNode> fieldNodes, String... paths) { List<String> expectedPaths = new ArrayList<>(); for (FieldNode fn : fieldNodes) { expectedPaths.addAll(fn.toPaths()); } assertEquals("Expected paths to have length " + expectedPaths + ", but got " + paths.length, expectedPaths.size(), paths.length); for (int i = 0; i < expectedPaths.size(); ++i) { assertEquals("Element at index " + i + " doesn't match", expectedPaths.get(i), paths[i]); } } private SelectOperator buildSelectOperator( List<ExprNodeDesc> colList, List<FieldNode> outputCols) { SelectOperator selectOperator = mock(SelectOperator.class); SelectDesc selectDesc = new SelectDesc(colList, ColumnPrunerProcCtx.toColumnNames(outputCols)); selectDesc.setSelStarNoCompute(false); when(selectOperator.getConf()).thenReturn(selectDesc); return selectOperator; } }