/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.operator.filter; import com.linkedin.pinot.common.data.DimensionFieldSpec; import com.linkedin.pinot.common.data.FieldSpec; import com.linkedin.pinot.common.data.Schema; import com.linkedin.pinot.common.request.BrokerRequest; import com.linkedin.pinot.common.segment.ReadMode; import com.linkedin.pinot.common.utils.request.FilterQueryTree; import com.linkedin.pinot.common.utils.request.RequestUtils; import com.linkedin.pinot.core.common.BlockDocIdIterator; import com.linkedin.pinot.core.common.BlockDocIdSet; import com.linkedin.pinot.core.common.Constants; import com.linkedin.pinot.core.data.GenericRow; import com.linkedin.pinot.core.data.readers.FileFormat; import com.linkedin.pinot.core.data.readers.RecordReader; import com.linkedin.pinot.core.indexsegment.IndexSegment; import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig; import com.linkedin.pinot.core.operator.blocks.BaseFilterBlock; import com.linkedin.pinot.core.operator.filter.BaseFilterOperator; import com.linkedin.pinot.core.plan.FilterPlanNode; import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl; import com.linkedin.pinot.core.segment.index.loader.Loaders; import com.linkedin.pinot.pql.parsers.Pql2Compiler; import com.linkedin.pinot.query.transform.TransformExpressionOperatorTest; import com.linkedin.pinot.util.TestUtils; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; /** * Unit test for optimization of predicates that evaluate to always false, during filter tree creation. */ public class FilterTreeOptimizationTest { private static final Logger LOGGER = LoggerFactory.getLogger(TransformExpressionOperatorTest.class); private static final String SEGMENT_DIR_NAME = System.getProperty("java.io.tmpdir") + File.separator + "filterOptimization"; private static final String SEGMENT_NAME = "filterOptSeg"; private static final String TABLE_NAME = "filterOptTable"; private static final int NUM_ROWS = 1000; private static final String[] DIMENSIONS = new String[]{"dim_0", "dim_1", "dim_2", "dim_3"}; private static final int MAX_DIMENSION_VALUES = 100; private IndexSegment _indexSegment; private Pql2Compiler _compiler; @BeforeClass public void setup() throws Exception { Schema schema = buildSchema(DIMENSIONS); buildSegment(SEGMENT_DIR_NAME, SEGMENT_NAME, schema); _indexSegment = Loaders.IndexSegment.load(new File(SEGMENT_DIR_NAME, SEGMENT_NAME), ReadMode.heap); _compiler = new Pql2Compiler(); } @AfterClass public void tearDown() throws IOException { FileUtils.deleteDirectory(new File(SEGMENT_DIR_NAME)); } @Test public void test() { // No alwaysFalse predicates testQuery( String.format("select count(*) from %s where (%s = 'dim_0_1' AND %s <> 'dim_1_2')", TABLE_NAME, DIMENSIONS[0], DIMENSIONS[1])); // (nonAlwaysFalse AND (nonAlwaysFalse AND alwaysFalse) testQuery( String.format("select count(*) from %s where (%s = 'dim_0_1' AND (%s <> 'dim_1_2' AND %s = 'x'))", TABLE_NAME, DIMENSIONS[0], DIMENSIONS[1], DIMENSIONS[2])); // (alwaysFalse OR (alwaysFalse OR alwaysFalse)) testQuery( String.format("select count(*) from %s where (%s = 'x' OR (%s ='y' OR %s = 'z'))", TABLE_NAME, DIMENSIONS[0], DIMENSIONS[1], DIMENSIONS[2])); // ((alwaysFalse AND nonAlwaysFalse) OR AlwaysFalse) testQuery( String.format("select count(*) from %s where ((%s = 'x' AND %s ='dim_1_5') OR %s = 'z')", TABLE_NAME, DIMENSIONS[0], DIMENSIONS[1], DIMENSIONS[2])); } /** * Helper method to perform the actual testing for the given query. * <ul> * <li> Constructs the operator tree with and without alwaysFalse optimizations. </li> * <li> Compares that all docIds filtered by the two operators are identical. </li> * </ul> * @param query Query to run. */ private void testQuery(String query) { BrokerRequest brokerRequest = _compiler.compileToBrokerRequest(query); FilterQueryTree filterQueryTree = RequestUtils.generateFilterQueryTree(brokerRequest); BaseFilterOperator expectedOperator = FilterPlanNode.constructPhysicalOperator(filterQueryTree, _indexSegment, false); BaseFilterOperator actualOperator = FilterPlanNode.constructPhysicalOperator(filterQueryTree, _indexSegment, true); BaseFilterBlock expectedBlock; while ((expectedBlock = expectedOperator.getNextBlock()) != null) { BaseFilterBlock actualBlock = actualOperator.getNextBlock(); Assert.assertNotNull(actualBlock); final BlockDocIdSet expectedDocIdSet = expectedBlock.getBlockDocIdSet(); final BlockDocIdIterator expectedIterator = expectedDocIdSet.iterator(); final BlockDocIdSet actualDocIdSet = actualBlock.getBlockDocIdSet(); final BlockDocIdIterator actualIterator = actualDocIdSet.iterator(); int expectedDocId; int actualDocId; while (((expectedDocId = expectedIterator.next()) != Constants.EOF) && ((actualDocId = actualIterator.next()) != Constants.EOF)) { Assert.assertEquals(actualDocId, expectedDocId); } Assert.assertTrue(expectedIterator.next() == Constants.EOF); Assert.assertTrue(actualIterator.next() == Constants.EOF); } } /** * Helper method to build a segment. * * @param segmentDirName Name of segment directory * @param segmentName Name of segment * @param schema Schema for segment * @return Schema built for the segment * @throws Exception */ private RecordReader buildSegment(String segmentDirName, String segmentName, Schema schema) throws Exception { SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema); config.setOutDir(segmentDirName); config.setFormat(FileFormat.AVRO); config.setTableName(TABLE_NAME); config.setSegmentName(segmentName); final List<GenericRow> data = new ArrayList<>(); for (int row = 0; row < NUM_ROWS; row++) { HashMap<String, Object> map = new HashMap<>(); for (String dimensionName : DIMENSIONS) { map.put(dimensionName, dimensionName + '_' + (row % MAX_DIMENSION_VALUES)); } GenericRow genericRow = new GenericRow(); genericRow.init(map); data.add(genericRow); } SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl(); RecordReader reader = new TestUtils.GenericRowRecordReader(schema, data); driver.init(config, reader); driver.build(); LOGGER.info("Built segment {} at {}", segmentName, segmentDirName); return reader; } /** * Helper method to build a schema containing dimensions with names passed in. * * @param dimensions Dimension names * */ private static Schema buildSchema(String[] dimensions) { Schema schema = new Schema(); for (String dimension : dimensions) { DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimension, FieldSpec.DataType.STRING, true); schema.addField(dimensionFieldSpec); } return schema; } }