/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.accumulo.predicate; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; import org.apache.hadoop.hive.accumulo.columns.ColumnMapper; import org.apache.hadoop.hive.accumulo.predicate.compare.CompareOp; import org.apache.hadoop.hive.accumulo.predicate.compare.DoubleCompare; import org.apache.hadoop.hive.accumulo.predicate.compare.Equal; import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThan; import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual; import org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare; import org.apache.hadoop.hive.accumulo.predicate.compare.LessThan; import org.apache.hadoop.hive.accumulo.predicate.compare.LessThanOrEqual; import org.apache.hadoop.hive.accumulo.predicate.compare.LongCompare; import org.apache.hadoop.hive.accumulo.predicate.compare.NotEqual; import org.apache.hadoop.hive.accumulo.predicate.compare.PrimitiveComparison; import org.apache.hadoop.hive.accumulo.predicate.compare.StringCompare; import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; import org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.index.IndexSearchCondition; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaIntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.StringUtils; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; import com.google.common.base.Joiner; import com.google.common.collect.Lists; public class TestAccumuloPredicateHandler { private AccumuloPredicateHandler handler = AccumuloPredicateHandler.getInstance(); private JobConf conf; private ColumnMapper columnMapper; @Before public void setup() throws TooManyAccumuloColumnsException { FunctionRegistry.getFunctionNames(); conf = new JobConf(); List<String> columnNames = Arrays.asList("field1", "rid"); List<TypeInfo> columnTypes = Arrays.<TypeInfo> asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames)); conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,string"); String columnMappingStr = "cf:f1,:rowID"; conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr); columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes); } @Test public void testGetRowIDSearchCondition() { ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "hi"); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqual(), children); assertNotNull(node); String filterExpr = SerializationUtilities.serializeExpression(node); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); List<IndexSearchCondition> sConditions = handler.getSearchConditions(conf); assertEquals(sConditions.size(), 1); } @Test() public void testRangeEqual() throws SerDeException { ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqual(), children); assertNotNull(node); String filterExpr = SerializationUtilities.serializeExpression(node); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); Collection<Range> ranges = handler.getRanges(conf, columnMapper); assertEquals(ranges.size(), 1); Range range = ranges.iterator().next(); assertTrue(range.isStartKeyInclusive()); assertFalse(range.isEndKeyInclusive()); assertTrue(range.contains(new Key(new Text("aaa")))); assertTrue(range.afterEndKey(new Key(new Text("aab")))); assertTrue(range.beforeStartKey(new Key(new Text("aa")))); } @Test() public void testRangeGreaterThan() throws SerDeException { ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children); assertNotNull(node); String filterExpr = SerializationUtilities.serializeExpression(node); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); Collection<Range> ranges = handler.getRanges(conf, columnMapper); assertEquals(ranges.size(), 1); Range range = ranges.iterator().next(); assertTrue(range.isStartKeyInclusive()); assertFalse(range.isEndKeyInclusive()); assertFalse(range.contains(new Key(new Text("aaa")))); assertFalse(range.afterEndKey(new Key(new Text("ccccc")))); assertTrue(range.contains(new Key(new Text("aab")))); assertTrue(range.beforeStartKey(new Key(new Text("aa")))); assertTrue(range.beforeStartKey(new Key(new Text("aaa")))); } @Test public void rangeGreaterThanOrEqual() throws SerDeException { ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children); assertNotNull(node); String filterExpr = SerializationUtilities.serializeExpression(node); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); Collection<Range> ranges = handler.getRanges(conf, columnMapper); assertEquals(ranges.size(), 1); Range range = ranges.iterator().next(); assertTrue(range.isStartKeyInclusive()); assertFalse(range.isEndKeyInclusive()); assertTrue(range.contains(new Key(new Text("aaa")))); assertFalse(range.afterEndKey(new Key(new Text("ccccc")))); assertTrue(range.contains(new Key(new Text("aab")))); assertTrue(range.beforeStartKey(new Key(new Text("aa")))); } @Test public void rangeLessThan() throws SerDeException { ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children); assertNotNull(node); String filterExpr = SerializationUtilities.serializeExpression(node); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); Collection<Range> ranges = handler.getRanges(conf, columnMapper); assertEquals(ranges.size(), 1); Range range = ranges.iterator().next(); assertTrue(range.isStartKeyInclusive()); assertFalse(range.isEndKeyInclusive()); assertFalse(range.contains(new Key(new Text("aaa")))); assertTrue(range.afterEndKey(new Key(new Text("ccccc")))); assertTrue(range.contains(new Key(new Text("aa")))); assertTrue(range.afterEndKey(new Key(new Text("aab")))); assertTrue(range.afterEndKey(new Key(new Text("aaa")))); } @Test public void rangeLessThanOrEqual() throws SerDeException { ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children); assertNotNull(node); String filterExpr = SerializationUtilities.serializeExpression(node); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); Collection<Range> ranges = handler.getRanges(conf, columnMapper); assertEquals(ranges.size(), 1); Range range = ranges.iterator().next(); assertTrue(range.isStartKeyInclusive()); assertFalse(range.isEndKeyInclusive()); assertTrue(range.contains(new Key(new Text("aaa")))); assertTrue(range.afterEndKey(new Key(new Text("ccccc")))); assertTrue(range.contains(new Key(new Text("aa")))); assertTrue(range.afterEndKey(new Key(new Text("aab")))); assertFalse(range.afterEndKey(new Key(new Text("aaa")))); } @Test public void testDisjointRanges() throws SerDeException { ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children); assertNotNull(node); ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb"); List<ExprNodeDesc> children2 = Lists.newArrayList(); children2.add(column2); children2.add(constant2); ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2); assertNotNull(node2); List<ExprNodeDesc> bothFilters = Lists.newArrayList(); bothFilters.add(node); bothFilters.add(node2); ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters); String filterExpr = SerializationUtilities.serializeExpression(both); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); Collection<Range> ranges = handler.getRanges(conf, columnMapper); // Impossible to get ranges for row <= 'aaa' and row >= 'bbb' assertEquals(0, ranges.size()); } @Test public void testMultipleRanges() throws SerDeException { ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children); assertNotNull(node); ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb"); List<ExprNodeDesc> children2 = Lists.newArrayList(); children2.add(column2); children2.add(constant2); ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children2); assertNotNull(node2); List<ExprNodeDesc> bothFilters = Lists.newArrayList(); bothFilters.add(node); bothFilters.add(node2); ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters); String filterExpr = SerializationUtilities.serializeExpression(both); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); List<Range> ranges = handler.getRanges(conf, columnMapper); assertEquals(1, ranges.size()); Range range = ranges.get(0); assertEquals(new Range(new Key("aaa"), true, new Key("bbb"), false), range); } @Test public void testPushdownTuple() throws SerDeException, NoSuchPrimitiveComparisonException, NoSuchCompareOpException { ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field1", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqual(), children); assertNotNull(node); String filterExpr = SerializationUtilities.serializeExpression(node); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); List<IndexSearchCondition> sConditions = handler.getSearchConditions(conf); assertEquals(sConditions.size(), 1); IndexSearchCondition sc = sConditions.get(0); PushdownTuple tuple = new PushdownTuple(sConditions.get(0), handler.getPrimitiveComparison(sc .getColumnDesc().getTypeString(), sc), handler.getCompareOp(sc.getComparisonOp(), sc)); byte[] expectedVal = new byte[4]; ByteBuffer.wrap(expectedVal).putInt(5); assertArrayEquals(tuple.getConstVal(), expectedVal); assertEquals(tuple.getcOpt().getClass(), Equal.class); assertEquals(tuple.getpCompare().getClass(), IntCompare.class); } @Test(expected = NoSuchPrimitiveComparisonException.class) public void testPushdownColumnTypeNotSupported() throws SerDeException, NoSuchPrimitiveComparisonException, NoSuchCompareOpException { ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.floatTypeInfo, "field1", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, 5.5f); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqual(), children); assertNotNull(node); String filterExpr = SerializationUtilities.serializeExpression(node); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); List<IndexSearchCondition> sConditions = handler.getSearchConditions(conf); assertEquals(sConditions.size(), 1); IndexSearchCondition sc = sConditions.get(0); handler.getPrimitiveComparison(sc.getColumnDesc().getTypeString(), sc); } @Test public void testPushdownComparisonOptNotSupported() { try { ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, false); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPNotNull(), children); assertNotNull(node); String filterExpr = SerializationUtilities.serializeExpression(node); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); List<IndexSearchCondition> sConditions = handler.getSearchConditions(conf); assertEquals(sConditions.size(), 1); IndexSearchCondition sc = sConditions.get(0); new PushdownTuple(sc, handler.getPrimitiveComparison(sc.getColumnDesc().getTypeString(), sc), handler.getCompareOp(sc.getComparisonOp(), sc)); fail("Should fail: compare op not registered for index analyzer. Should leave undesirable residual predicate"); } catch (RuntimeException e) { assertTrue(e.getMessage().contains("Unexpected residual predicate: field1 is not null")); } catch (Exception e) { fail(StringUtils.stringifyException(e)); } } @Test public void testIteratorIgnoreRowIDFields() { ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children); assertNotNull(node); ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb"); List<ExprNodeDesc> children2 = Lists.newArrayList(); children2.add(column2); children2.add(constant2); ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2); assertNotNull(node2); List<ExprNodeDesc> bothFilters = Lists.newArrayList(); bothFilters.add(node); bothFilters.add(node2); ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters); String filterExpr = SerializationUtilities.serializeExpression(both); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); try { List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper); assertEquals(iterators.size(), 0); } catch (SerDeException e) { StringUtils.stringifyException(e); } } @Test public void testIgnoreIteratorPushdown() throws TooManyAccumuloColumnsException { // Override what's placed in the Configuration by setup() conf = new JobConf(); List<String> columnNames = Arrays.asList("field1", "field2", "rid"); List<TypeInfo> columnTypes = Arrays.<TypeInfo> asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo); conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames)); conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string"); String columnMappingStr = "cf:f1,cf:f2,:rowID"; conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr); columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes); ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children); assertNotNull(node); ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field2", null, false); ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5); List<ExprNodeDesc> children2 = Lists.newArrayList(); children2.add(column2); children2.add(constant2); ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2); assertNotNull(node2); List<ExprNodeDesc> bothFilters = Lists.newArrayList(); bothFilters.add(node); bothFilters.add(node2); ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters); String filterExpr = SerializationUtilities.serializeExpression(both); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); conf.setBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, false); try { List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper); assertEquals(iterators.size(), 0); } catch (Exception e) { fail(StringUtils.stringifyException(e)); } } @Test public void testCreateIteratorSettings() throws Exception { // Override what's placed in the Configuration by setup() conf = new JobConf(); List<String> columnNames = Arrays.asList("field1", "field2", "rid"); List<TypeInfo> columnTypes = Arrays.<TypeInfo> asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo); conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames)); conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string"); conf.set(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName()); String columnMappingStr = "cf:f1,cf:f2,:rowID"; conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr); columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes); ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children); assertNotNull(node); ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field2", null, false); ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5); List<ExprNodeDesc> children2 = Lists.newArrayList(); children2.add(column2); children2.add(constant2); ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2); assertNotNull(node2); List<ExprNodeDesc> bothFilters = Lists.newArrayList(); bothFilters.add(node); bothFilters.add(node2); ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters); String filterExpr = SerializationUtilities.serializeExpression(both); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper); assertEquals(iterators.size(), 2); IteratorSetting is1 = iterators.get(0); IteratorSetting is2 = iterators.get(1); boolean foundQual = false; boolean foundPCompare = false; boolean foundCOpt = false; boolean foundConst = false; for (Map.Entry<String,String> option : is1.getOptions().entrySet()) { String optKey = option.getKey(); if (optKey.equals(PrimitiveComparisonFilter.COLUMN)) { foundQual = true; assertEquals(option.getValue(), "cf:f1"); } else if (optKey.equals(PrimitiveComparisonFilter.CONST_VAL)) { foundConst = true; assertEquals(option.getValue(), new String(Base64.encodeBase64("aaa".getBytes()))); } else if (optKey.equals(PrimitiveComparisonFilter.COMPARE_OPT_CLASS)) { foundCOpt = true; assertEquals(option.getValue(), LessThanOrEqual.class.getName()); } else if (optKey.equals(PrimitiveComparisonFilter.P_COMPARE_CLASS)) { foundPCompare = true; assertEquals(option.getValue(), StringCompare.class.getName()); } } assertTrue(foundConst & foundCOpt & foundPCompare & foundQual); foundQual = false; foundPCompare = false; foundCOpt = false; foundConst = false; for (Map.Entry<String,String> option : is2.getOptions().entrySet()) { String optKey = option.getKey(); if (optKey.equals(PrimitiveComparisonFilter.COLUMN)) { foundQual = true; assertEquals(option.getValue(), "cf:f2"); } else if (optKey.equals(PrimitiveComparisonFilter.CONST_VAL)) { foundConst = true; byte[] intVal = new byte[4]; ByteBuffer.wrap(intVal).putInt(5); assertEquals(option.getValue(), new String(Base64.encodeBase64(intVal))); } else if (optKey.equals(PrimitiveComparisonFilter.COMPARE_OPT_CLASS)) { foundCOpt = true; assertEquals(option.getValue(), GreaterThan.class.getName()); } else if (optKey.equals(PrimitiveComparisonFilter.P_COMPARE_CLASS)) { foundPCompare = true; assertEquals(option.getValue(), IntCompare.class.getName()); } } assertTrue(foundConst & foundCOpt & foundPCompare & foundQual); } @Test public void testBasicOptLookup() throws NoSuchCompareOpException { boolean foundEqual = false; boolean foundNotEqual = false; boolean foundGreaterThanOrEqual = false; boolean foundGreaterThan = false; boolean foundLessThanOrEqual = false; boolean foundLessThan = false; for (String opt : handler.cOpKeyset()) { Class<? extends CompareOp> compOpt = handler.getCompareOpClass(opt); if (compOpt.getName().equals(Equal.class.getName())) { foundEqual = true; } else if (compOpt.getName().equals(NotEqual.class.getName())) { foundNotEqual = true; } else if (compOpt.getName().equals(GreaterThan.class.getName())) { foundGreaterThan = true; } else if (compOpt.getName().equals(GreaterThanOrEqual.class.getName())) { foundGreaterThanOrEqual = true; } else if (compOpt.getName().equals(LessThan.class.getName())) { foundLessThan = true; } else if (compOpt.getName().equals(LessThanOrEqual.class.getName())) { foundLessThanOrEqual = true; } } assertTrue("Did not find Equal comparison op", foundEqual); assertTrue("Did not find NotEqual comparison op", foundNotEqual); assertTrue("Did not find GreaterThan comparison op", foundGreaterThan); assertTrue("Did not find GreaterThanOrEqual comparison op", foundGreaterThanOrEqual); assertTrue("Did not find LessThan comparison op", foundLessThan); assertTrue("Did not find LessThanOrEqual comparison op", foundLessThanOrEqual); } @Test(expected = NoSuchCompareOpException.class) public void testNoOptFound() throws NoSuchCompareOpException { handler.getCompareOpClass("blah"); } @Test public void testPrimitiveComparsionLookup() throws NoSuchPrimitiveComparisonException { boolean foundLong = false; boolean foundString = false; boolean foundInt = false; boolean foundDouble = false; for (String type : handler.pComparisonKeyset()) { Class<? extends PrimitiveComparison> pCompare = handler.getPrimitiveComparisonClass(type); if (pCompare.getName().equals(DoubleCompare.class.getName())) { foundDouble = true; } else if (pCompare.getName().equals(LongCompare.class.getName())) { foundLong = true; } else if (pCompare.getName().equals(IntCompare.class.getName())) { foundInt = true; } else if (pCompare.getName().equals(StringCompare.class.getName())) { foundString = true; } } assertTrue("Did not find DoubleCompare op", foundDouble); assertTrue("Did not find LongCompare op", foundLong); assertTrue("Did not find IntCompare op", foundInt); assertTrue("Did not find StringCompare op", foundString); } @Test public void testRowRangeIntersection() throws SerDeException { // rowId >= 'f' ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f"); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(column); children.add(constant); ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children); assertNotNull(node); // rowId <= 'm' ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "m"); List<ExprNodeDesc> children2 = Lists.newArrayList(); children2.add(column2); children2.add(constant2); ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children2); assertNotNull(node2); List<ExprNodeDesc> bothFilters = Lists.newArrayList(); bothFilters.add(node); bothFilters.add(node2); ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters); String filterExpr = SerializationUtilities.serializeExpression(both); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); // Should make ['f', 'm\0') List<Range> ranges = handler.getRanges(conf, columnMapper); assertEquals(1, ranges.size()); assertEquals(new Range(new Key("f"), true, new Key("m\0"), false), ranges.get(0)); } @Test public void testRowRangeGeneration() throws SerDeException { List<String> columnNames = Arrays.asList("key", "column"); List<TypeInfo> columnTypes = Arrays.<TypeInfo> asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames)); conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,string"); String columnMappingStr = ":rowID,cf:f1"; conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr); columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes); // 100 < key ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "key", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 100); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(constant); children.add(column); ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children); assertNotNull(node); String filterExpr = SerializationUtilities.serializeExpression(node); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); // Should make (100, +inf) List<Range> ranges = handler.getRanges(conf, columnMapper); Assert.assertEquals(1, ranges.size()); Assert.assertEquals(new Range(new Text("100"), false, null, false), ranges.get(0)); } @Test public void testBinaryRangeGeneration() throws Exception { List<String> columnNames = Arrays.asList("key", "column"); List<TypeInfo> columnTypes = Arrays.<TypeInfo> asList(TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo); conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames)); conf.set(serdeConstants.LIST_COLUMN_TYPES, "int,string"); String columnMappingStr = ":rowID#b,cf:f1"; conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr); columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes); int intValue = 100; // Make binary integer value in the bytearray ByteArrayOutputStream baos = new ByteArrayOutputStream(); JavaIntObjectInspector intOI = (JavaIntObjectInspector) PrimitiveObjectInspectorFactory .getPrimitiveJavaObjectInspector(TypeInfoFactory .getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME)); LazyUtils.writePrimitive(baos, intValue, intOI); // 100 < key ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "key", null, false); ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, intValue); List<ExprNodeDesc> children = Lists.newArrayList(); children.add(constant); children.add(column); ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children); assertNotNull(node); String filterExpr = SerializationUtilities.serializeExpression(node); conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); // Should make (100, +inf) List<Range> ranges = handler.getRanges(conf, columnMapper); Assert.assertEquals(1, ranges.size()); Assert.assertEquals(new Range(new Text(baos.toByteArray()), false, null, false), ranges.get(0)); } @Test public void testNullRangeGeneratorOutput() throws SerDeException { // The AccumuloRangeGenerator produces an Object (due to the limitations of the // traversal interface) which requires interpretation of that Object into Ranges. // Changes in the return object from the AccumuloRangeGenerator must also represent // a change in the AccumuloPredicateHandler. AccumuloPredicateHandler mockHandler = Mockito.mock(AccumuloPredicateHandler.class); ExprNodeDesc root = Mockito.mock(ExprNodeDesc.class); String hiveRowIdColumnName = "rid"; Mockito.when(mockHandler.getRanges(conf, columnMapper)).thenCallRealMethod(); Mockito.when(mockHandler.generateRanges(conf, columnMapper, hiveRowIdColumnName, root)).thenReturn(null); Mockito.when(mockHandler.getExpression(conf)).thenReturn(root); // A null result from AccumuloRangeGenerator is all ranges Assert.assertEquals(Arrays.asList(new Range()), mockHandler.getRanges(conf, columnMapper)); } @Test public void testEmptyListRangeGeneratorOutput() throws SerDeException { // The AccumuloRangeGenerator produces an Object (due to the limitations of the // traversal interface) which requires interpretation of that Object into Ranges. // Changes in the return object from the AccumuloRangeGenerator must also represent // a change in the AccumuloPredicateHandler. AccumuloPredicateHandler mockHandler = Mockito.mock(AccumuloPredicateHandler.class); ExprNodeDesc root = Mockito.mock(ExprNodeDesc.class); String hiveRowIdColumnName = "rid"; Mockito.when(mockHandler.getRanges(conf, columnMapper)).thenCallRealMethod(); Mockito.when(mockHandler.generateRanges(conf, columnMapper, hiveRowIdColumnName, root)) .thenReturn(Collections.emptyList()); Mockito.when(mockHandler.getExpression(conf)).thenReturn(root); // A null result from AccumuloRangeGenerator is all ranges Assert.assertEquals(Collections.emptyList(), mockHandler.getRanges(conf, columnMapper)); } @Test public void testSingleRangeGeneratorOutput() throws SerDeException { // The AccumuloRangeGenerator produces an Object (due to the limitations of the // traversal interface) which requires interpretation of that Object into Ranges. // Changes in the return object from the AccumuloRangeGenerator must also represent // a change in the AccumuloPredicateHandler. AccumuloPredicateHandler mockHandler = Mockito.mock(AccumuloPredicateHandler.class); ExprNodeDesc root = Mockito.mock(ExprNodeDesc.class); String hiveRowIdColumnName = "rid"; Range r = new Range("a"); Mockito.when(mockHandler.getRanges(conf, columnMapper)).thenCallRealMethod(); Mockito.when(mockHandler.generateRanges(conf, columnMapper, hiveRowIdColumnName, root)).thenReturn(r); Mockito.when(mockHandler.getExpression(conf)).thenReturn(root); // A null result from AccumuloRangeGenerator is all ranges Assert.assertEquals(Collections.singletonList(r), mockHandler.getRanges(conf, columnMapper)); } @Test public void testManyRangesGeneratorOutput() throws SerDeException { // The AccumuloRangeGenerator produces an Object (due to the limitations of the // traversal interface) which requires interpretation of that Object into Ranges. // Changes in the return object from the AccumuloRangeGenerator must also represent // a change in the AccumuloPredicateHandler. AccumuloPredicateHandler mockHandler = Mockito.mock(AccumuloPredicateHandler.class); ExprNodeDesc root = Mockito.mock(ExprNodeDesc.class); String hiveRowIdColumnName = "rid"; Range r1 = new Range("a"), r2 = new Range("z"); Mockito.when(mockHandler.getRanges(conf, columnMapper)).thenCallRealMethod(); Mockito.when(mockHandler.generateRanges(conf, columnMapper, hiveRowIdColumnName, root)) .thenReturn(Arrays.asList(r1, r2)); Mockito.when(mockHandler.getExpression(conf)).thenReturn(root); // A null result from AccumuloRangeGenerator is all ranges Assert.assertEquals(Arrays.asList(r1, r2), mockHandler.getRanges(conf, columnMapper)); } }