/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.planner; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.util.BitSets; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.types.TypeProtos; import org.apache.drill.exec.physical.base.FileGroupScan; import org.apache.drill.exec.physical.base.GroupScan; import org.apache.drill.exec.planner.logical.DrillRel; import org.apache.drill.exec.planner.logical.DrillScanRel; import org.apache.drill.exec.planner.physical.PlannerSettings; import org.apache.drill.exec.store.dfs.FileSelection; import org.apache.drill.exec.store.dfs.FormatSelection; import org.apache.drill.exec.store.dfs.MetadataContext; import org.apache.drill.exec.store.parquet.ParquetGroupScan; import org.apache.drill.exec.vector.ValueVector; import com.google.common.collect.Lists; import java.io.IOException; import java.util.BitSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; /** * PartitionDescriptor that describes partitions based on column names instead of directory structure */ public class ParquetPartitionDescriptor extends AbstractPartitionDescriptor { private final List<SchemaPath> partitionColumns; private final DrillScanRel scanRel; public ParquetPartitionDescriptor(PlannerSettings settings, DrillScanRel scanRel) { ParquetGroupScan scan = (ParquetGroupScan) scanRel.getGroupScan(); this.partitionColumns = scan.getPartitionColumns(); this.scanRel = scanRel; } @Override public int getPartitionHierarchyIndex(String partitionName) { throw new UnsupportedOperationException(); } @Override public boolean isPartitionName(String name) { return partitionColumns.contains(name); } @Override public Integer getIdIfValid(String name) { SchemaPath schemaPath = SchemaPath.getSimplePath(name); int id = partitionColumns.indexOf(schemaPath); if (id == -1) { return null; } return id; } @Override public int getMaxHierarchyLevel() { return partitionColumns.size(); } private GroupScan createNewGroupScan(List<String> newFiles, String cacheFileRoot, boolean wasAllPartitionsPruned, MetadataContext metaContext) throws IOException { final FileSelection newSelection = FileSelection.create(null, newFiles, getBaseTableLocation(), cacheFileRoot, wasAllPartitionsPruned); newSelection.setMetaContext(metaContext); final FileGroupScan newScan = ((FileGroupScan)scanRel.getGroupScan()).clone(newSelection); return newScan; } @Override public void populatePartitionVectors(ValueVector[] vectors, List<PartitionLocation> partitions, BitSet partitionColumnBitSet, Map<Integer, String> fieldNameMap) { int record = 0; for (PartitionLocation partitionLocation: partitions) { for (int partitionColumnIndex : BitSets.toIter(partitionColumnBitSet)) { SchemaPath column = SchemaPath.getSimplePath(fieldNameMap.get(partitionColumnIndex)); ((ParquetGroupScan) scanRel.getGroupScan()).populatePruningVector(vectors[partitionColumnIndex], record, column, partitionLocation.getEntirePartitionLocation()); } record++; } for (ValueVector v : vectors) { if (v == null) { continue; } v.getMutator().setValueCount(partitions.size()); } } @Override public TypeProtos.MajorType getVectorType(SchemaPath column, PlannerSettings plannerSettings) { return ((ParquetGroupScan) scanRel.getGroupScan()).getTypeForColumn(column); } @Override public String getBaseTableLocation() { final FormatSelection origSelection = (FormatSelection) scanRel.getDrillTable().getSelection(); return origSelection.getSelection().selectionRoot; } @Override protected void createPartitionSublists() { Set<String> fileLocations = ((ParquetGroupScan) scanRel.getGroupScan()).getFileSet(); List<PartitionLocation> locations = new LinkedList<>(); for (String file: fileLocations) { locations.add(new ParquetPartitionLocation(file)); } locationSuperList = Lists.partition(locations, PartitionDescriptor.PARTITION_BATCH_SIZE); sublistsCreated = true; } @Override public TableScan createTableScan(List<PartitionLocation> newPartitionLocation, String cacheFileRoot, boolean wasAllPartitionsPruned, MetadataContext metaContext) throws Exception { List<String> newFiles = Lists.newArrayList(); for (final PartitionLocation location : newPartitionLocation) { newFiles.add(location.getEntirePartitionLocation()); } final GroupScan newGroupScan = createNewGroupScan(newFiles, cacheFileRoot, wasAllPartitionsPruned, metaContext); return new DrillScanRel(scanRel.getCluster(), scanRel.getTraitSet().plus(DrillRel.DRILL_LOGICAL), scanRel.getTable(), newGroupScan, scanRel.getRowType(), scanRel.getColumns(), true /*filter pushdown*/); } @Override public TableScan createTableScan(List<PartitionLocation> newPartitionLocation, boolean wasAllPartitionsPruned) throws Exception { return createTableScan(newPartitionLocation, null, wasAllPartitionsPruned, null); } }