/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.store.hive; import java.lang.reflect.Constructor; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.exec.ops.FragmentContext; import org.apache.drill.exec.physical.impl.BatchCreator; import org.apache.drill.exec.physical.impl.ScanBatch; import org.apache.drill.exec.record.RecordBatch; import org.apache.drill.exec.store.RecordReader; import org.apache.drill.exec.util.ImpersonationUtil; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; import org.apache.hadoop.mapred.InputSplit; import com.google.common.collect.Lists; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.security.UserGroupInformation; @SuppressWarnings("unused") public class HiveScanBatchCreator implements BatchCreator<HiveSubScan> { /** * Use different classes for different Hive native formats: * ORC, AVRO, RCFFile, Text and Parquet. * If input format is none of them falls to default reader. */ static Map<String, Class> readerMap = new HashMap<>(); static { readerMap.put(OrcInputFormat.class.getCanonicalName(), HiveOrcReader.class); readerMap.put(AvroContainerInputFormat.class.getCanonicalName(), HiveAvroReader.class); readerMap.put(RCFileInputFormat.class.getCanonicalName(), HiveRCFileReader.class); readerMap.put(MapredParquetInputFormat.class.getCanonicalName(), HiveParquetReader.class); readerMap.put(TextInputFormat.class.getCanonicalName(), HiveTextReader.class); } @Override public ScanBatch getBatch(FragmentContext context, HiveSubScan config, List<RecordBatch> children) throws ExecutionSetupException { List<RecordReader> readers = Lists.newArrayList(); HiveTableWithColumnCache table = config.getTable(); List<InputSplit> splits = config.getInputSplits(); List<HivePartition> partitions = config.getPartitions(); boolean hasPartitions = (partitions != null && partitions.size() > 0); int i = 0; final UserGroupInformation proxyUgi = ImpersonationUtil.createProxyUgi(config.getUserName(), context.getQueryUserName()); final HiveConf hiveConf = config.getHiveConf(); final String formatName = table.getSd().getInputFormat(); Class<? extends HiveAbstractReader> readerClass = HiveDefaultReader.class; if (readerMap.containsKey(formatName)) { readerClass = readerMap.get(formatName); } Constructor<? extends HiveAbstractReader> readerConstructor = null; try { readerConstructor = readerClass.getConstructor(HiveTableWithColumnCache.class, HivePartition.class, InputSplit.class, List.class, FragmentContext.class, HiveConf.class, UserGroupInformation.class); for (InputSplit split : splits) { readers.add(readerConstructor.newInstance(table, (hasPartitions ? partitions.get(i++) : null), split, config.getColumns(), context, hiveConf, proxyUgi)); } if (readers.size() == 0) { readers.add(readerConstructor.newInstance( table, null, null, config.getColumns(), context, hiveConf, proxyUgi)); } } catch(Exception e) { logger.error("No constructor for {}, thrown {}", readerClass.getName(), e); } return new ScanBatch(config, context, readers.iterator()); } }