OrcInputFormat.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.io.orc;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.ValidReadTxnList;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.Metastore;
import org.apache.hadoop.hive.metastore.Metastore.SplitInfos;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
import org.apache.hadoop.hive.ql.io.AcidInputFormat;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.AcidUtils.AcidBaseFileInfo;
import org.apache.hadoop.hive.ql.io.AcidUtils.AcidOperationalProperties;
import org.apache.hadoop.hive.ql.io.AcidUtils.Directory;
import org.apache.hadoop.hive.ql.io.AcidUtils.ParsedDelta;
import org.apache.hadoop.hive.ql.io.BatchToRowInputFormat;
import org.apache.hadoop.hive.ql.io.BatchToRowReader;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.io.HdfsUtils;
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.io.IOConstants;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.hive.ql.io.LlapWrappableInputFormatInterface;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.ql.io.SelfDescribingInputFormatInterface;
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
import org.apache.hadoop.hive.ql.io.SyntheticFileId;
import org.apache.hadoop.hive.ql.io.orc.ExternalCache.ExternalFooterCachesByConf;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.hive.common.util.Ref;
import org.apache.orc.ColumnStatistics;
import org.apache.orc.OrcProto;
import org.apache.orc.OrcUtils;
import org.apache.orc.StripeInformation;
import org.apache.orc.StripeStatistics;
import org.apache.orc.TypeDescription;
import org.apache.orc.impl.InStream;
import org.apache.orc.impl.OrcTail;
import org.apache.orc.impl.SchemaEvolution;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.protobuf.CodedInputStream;
/**
 * A MapReduce/Hive input format for ORC files.
 * <p>
 * This class implements both the classic InputFormat, which stores the rows
 * directly, and AcidInputFormat, which stores a series of events with the
 * following schema:
 * <pre>
 *   class AcidEvent<ROW> {
 *     enum ACTION {INSERT, UPDATE, DELETE}
 *     ACTION operation;
 *     long originalTransaction;
 *     int bucket;
 *     long rowId;
 *     long currentTransaction;
 *     ROW row;
 *   }
 * </pre>
 * Each AcidEvent object corresponds to an update event. The
 * originalTransaction, bucket, and rowId are the unique identifier for the row.
 * The operation and currentTransaction are the operation and the transaction
 * that added this event. Insert and update events include the entire row, while
 * delete events have null for row.
 */
public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
  InputFormatChecker, VectorizedInputFormatInterface, LlapWrappableInputFormatInterface,
  SelfDescribingInputFormatInterface, AcidInputFormat<NullWritable, OrcStruct>,
  CombineHiveInputFormat.AvoidSplitCombination, BatchToRowInputFormat {

  static enum SplitStrategyKind {
    HYBRID,
    BI,
    ETL
  }

  private static final Logger LOG = LoggerFactory.getLogger(OrcInputFormat.class);
  private static final boolean isDebugEnabled = LOG.isDebugEnabled();
  static final HadoopShims SHIMS = ShimLoader.getHadoopShims();

  private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024;
  private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024;
  private static final int DEFAULT_ETL_FILE_THRESHOLD = 100;

  /**
   * When picking the hosts for a split that crosses block boundaries,
   * drop any host that has fewer than MIN_INCLUDED_LOCATION of the
   * number of bytes available on the host with the most.
   * If host1 has 10MB of the split, host2 has 20MB, and host3 has 18MB the
   * split will contain host2 (100% of host2) and host3 (90% of host2). Host1
   * with 50% will be dropped.
   */
  private static final double MIN_INCLUDED_LOCATION = 0.80;

  @Override
  public boolean shouldSkipCombine(Path path,
                                   Configuration conf) throws IOException {
    return (conf.get(AcidUtils.CONF_ACID_KEY) != null) || AcidUtils.isAcid(path, conf);
  }


  /**
   * We can derive if a split is ACID or not from the flags encoded in OrcSplit.
   * If the file split is not instance of OrcSplit then its definitely not ACID.
   * If file split is instance of OrcSplit and the flags contain hasBase or deltas then it's
   * definitely ACID.
   * Else fallback to configuration object/table property.
   * @param conf
   * @param inputSplit
   * @return
   */
  public boolean isAcidRead(Configuration conf, InputSplit inputSplit) {
    if (!(inputSplit instanceof OrcSplit)) {
      return false;
    }

    /*
     * If OrcSplit.isAcid returns true, we know for sure it is ACID.
     */
    // if (((OrcSplit) inputSplit).isAcid()) {
    //   return true;
    // }

    /*
     * Fallback for the case when OrcSplit flags do not contain hasBase and deltas
     */
    return HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN);
  }

  private static class OrcRecordReader
      implements org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct>,
      StatsProvidingRecordReader {
    private final RecordReader reader;
    private final long offset;
    private final long length;
    private final int numColumns;
    private float progress = 0.0f;
    private final Reader file;
    private final SerDeStats stats;


    OrcRecordReader(Reader file, Configuration conf,
                    FileSplit split) throws IOException {
      List<OrcProto.Type> types = file.getTypes();
      this.file = file;
      numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount();
      this.offset = split.getStart();
      this.length = split.getLength();
      this.reader = createReaderFromFile(file, conf, offset, length);
      this.stats = new SerDeStats();
    }

    @Override
    public boolean next(NullWritable key, OrcStruct value) throws IOException {
      if (reader.hasNext()) {
        reader.next(value);
        progress = reader.getProgress();
        return true;
      } else {
        return false;
      }
    }

    @Override
    public NullWritable createKey() {
      return NullWritable.get();
    }

    @Override
    public OrcStruct createValue() {
      return new OrcStruct(numColumns);
    }

    @Override
    public long getPos() throws IOException {
      return offset + (long) (progress * length);
    }

    @Override
    public void close() throws IOException {
      reader.close();
    }

    @Override
    public float getProgress() throws IOException {
      return progress;
    }

    @Override
    public SerDeStats getStats() {
      stats.setRawDataSize(file.getRawDataSize());
      stats.setRowCount(file.getNumberOfRows());
      return stats;
    }
  }

  /**
   * Get the root column for the row. In ACID format files, it is offset by
   * the extra metadata columns.
   * @param isOriginal is the file in the original format?
   * @return the column number for the root of row.
   */
  public static int getRootColumn(boolean isOriginal) {
    return isOriginal ? 0 : (OrcRecordUpdater.ROW + 1);
  }

  public static void raiseAcidTablesMustBeReadWithAcidReaderException(Configuration conf)
      throws IOException {
    String hiveInputFormat = HiveConf.getVar(conf, ConfVars.HIVEINPUTFORMAT);
    if (hiveInputFormat.equals(HiveInputFormat.class.getName())) {
      throw new IOException(ErrorMsg.ACID_TABLES_MUST_BE_READ_WITH_ACID_READER.getErrorCodedMsg());
    } else {
      throw new IOException(ErrorMsg.ACID_TABLES_MUST_BE_READ_WITH_HIVEINPUTFORMAT.getErrorCodedMsg());
    }
  }

  public static RecordReader createReaderFromFile(Reader file,
                                                  Configuration conf,
                                                  long offset, long length
                                                  ) throws IOException {

    boolean isTransactionalTableScan = HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN);
    if (isTransactionalTableScan) {
      raiseAcidTablesMustBeReadWithAcidReaderException(conf);
    }

    /**
     * Do we have schema on read in the configuration variables?
     */
    TypeDescription schema = getDesiredRowTypeDescr(conf, false, Integer.MAX_VALUE);

    Reader.Options options = new Reader.Options().range(offset, length);
    options.schema(schema);
    boolean isOriginal = isOriginal(file);
    if (schema == null) {
      schema = file.getSchema();
    }
    List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema);
    options.include(genIncludedColumns(schema, conf));
    setSearchArgument(options, types, conf, isOriginal);
    return file.rowsOptions(options);
  }

  public static boolean isOriginal(Reader file) {
    return !file.hasMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME);
  }

  public static boolean[] genIncludedColumns(TypeDescription readerSchema,
                                             List<Integer> included) {

    boolean[] result = new boolean[readerSchema.getMaximumId() + 1];
    if (included == null) {
      Arrays.fill(result, true);
      return result;
    }
    result[0] = true;
    List<TypeDescription> children = readerSchema.getChildren();
    for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) {
      if (included.contains(columnNumber)) {
        TypeDescription child = children.get(columnNumber);
        for(int col = child.getId(); col <= child.getMaximumId(); ++col) {
          result[col] = true;
        }
      }
    }
    return result;
  }

  /**
   * Reverses genIncludedColumns; produces the table columns indexes from ORC included columns.
   * @param readerSchema The ORC reader schema for the table.
   * @param included The included ORC columns.
   * @param isFullColumnMatch Whether full column match should be enforced (i.e. whether to expect
   *          that all the sub-columns or a complex type column should be included or excluded
   *          together in the included array. If false, any sub-column being included for a complex
   *          type is sufficient for the entire complex column to be included in the result.
   * @return The list of table column indexes.
   */
  public static List<Integer> genIncludedColumnsReverse(
      TypeDescription readerSchema, boolean[] included, boolean isFullColumnMatch) {
    assert included != null;
    List<Integer> result = new ArrayList<>();
    List<TypeDescription> children = readerSchema.getChildren();
    for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) {
      TypeDescription child = children.get(columnNumber);
      int id = child.getId();
      int maxId = child.getMaximumId();
      if (id >= included.length || maxId >= included.length) {
        throw new AssertionError("Inconsistent includes: " + included.length
            + " elements; found column ID " + id);
      }
      boolean isIncluded = included[id];
      for (int col = id + 1; col <= maxId; ++col) {
        if (isFullColumnMatch && included[col] != isIncluded) {
          throw new AssertionError("Inconsistent includes: root column IDs are [" + id + ", "
              + maxId + "]; included[" + col + "] = " + included[col] + ", which is different "
              + " from the previous IDs of the same root column.");
        }
        isIncluded = isIncluded || included[col];
      }
      if (isIncluded) {
        result.add(columnNumber);
      }
    }
    return result;
  }

  /**
   * Take the configuration and figure out which columns we need to include.
   * @param readerSchema the types for the reader
   * @param conf the configuration
   */
  public static boolean[] genIncludedColumns(TypeDescription readerSchema,
                                             Configuration conf) {
     if (!ColumnProjectionUtils.isReadAllColumns(conf)) {
      List<Integer> included = ColumnProjectionUtils.getReadColumnIDs(conf);
      return genIncludedColumns(readerSchema, included);
    } else {
      return null;
    }
  }

  public static String[] getSargColumnNames(String[] originalColumnNames,
      List<OrcProto.Type> types, boolean[] includedColumns, boolean isOriginal) {
    int rootColumn = getRootColumn(isOriginal);
    String[] columnNames = new String[types.size() - rootColumn];
    int i = 0;
    // The way this works is as such. originalColumnNames is the equivalent on getNeededColumns
    // from TSOP. They are assumed to be in the same order as the columns in ORC file, AND they are
    // assumed to be equivalent to the columns in includedColumns (because it was generated from
    // the same column list at some point in the past), minus the subtype columns. Therefore, when
    // we go thru all the top level ORC file columns that are included, in order, they match
    // originalColumnNames. This way, we do not depend on names stored inside ORC for SARG leaf
    // column name resolution (see mapSargColumns method).
    for(int columnId: types.get(rootColumn).getSubtypesList()) {
      if (includedColumns == null || includedColumns[columnId - rootColumn]) {
        // this is guaranteed to be positive because types only have children
        // ids greater than their own id.
        columnNames[columnId - rootColumn] = originalColumnNames[i++];
      }
    }
    return columnNames;
  }

  static void setSearchArgument(Reader.Options options,
                                List<OrcProto.Type> types,
                                Configuration conf,
                                boolean isOriginal) {
    String neededColumnNames = getNeededColumnNamesString(conf);
    if (neededColumnNames == null) {
      LOG.debug("No ORC pushdown predicate - no column names");
      options.searchArgument(null, null);
      return;
    }
    SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf);
    if (sarg == null) {
      LOG.debug("No ORC pushdown predicate");
      options.searchArgument(null, null);
      return;
    }

    if (LOG.isInfoEnabled()) {
      LOG.info("ORC pushdown predicate: " + sarg);
    }
    options.searchArgument(sarg, getSargColumnNames(
        neededColumnNames.split(","), types, options.getInclude(), isOriginal));
  }

  static boolean canCreateSargFromConf(Configuration conf) {
    if (getNeededColumnNamesString(conf) == null) {
      LOG.debug("No ORC pushdown predicate - no column names");
      return false;
    }
    if (!ConvertAstToSearchArg.canCreateFromConf(conf)) {
      LOG.debug("No ORC pushdown predicate");
      return false;
    }
    return true;
  }

  private static String[] extractNeededColNames(
      List<OrcProto.Type> types, Configuration conf, boolean[] include, boolean isOriginal) {
    String colNames = getNeededColumnNamesString(conf);
    if (colNames == null) {
      return null;
    }
    return extractNeededColNames(types, colNames, include, isOriginal);
  }

  private static String[] extractNeededColNames(
      List<OrcProto.Type> types, String columnNamesString, boolean[] include, boolean isOriginal) {
    return getSargColumnNames(columnNamesString.split(","), types, include, isOriginal);
  }

  static String getNeededColumnNamesString(Configuration conf) {
    return conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
  }

  static String getSargColumnIDsString(Configuration conf) {
    return conf.getBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, true) ? null
        : conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
  }

  @Override
  public boolean validateInput(FileSystem fs, HiveConf conf,
                               List<FileStatus> files
                              ) throws IOException {

    if (Utilities.getUseVectorizedInputFileFormat(conf)) {
      return new VectorizedOrcInputFormat().validateInput(fs, conf, files);
    }

    if (files.size() <= 0) {
      return false;
    }
    for (FileStatus file : files) {
      // 0 length files cannot be ORC files
      if (file.getLen() == 0) {
        return false;
      }
      try {
        OrcFile.createReader(file.getPath(),
            OrcFile.readerOptions(conf).filesystem(fs).maxLength(file.getLen()));
      } catch (IOException e) {
        return false;
      }
    }
    return true;
  }

  /**
   * Get the list of input {@link Path}s for the map-reduce job.
   *
   * @param conf The configuration of the job
   * @return the list of input {@link Path}s for the map-reduce job.
   */
  static Path[] getInputPaths(Configuration conf) throws IOException {
    String dirs = conf.get("mapred.input.dir");
    if (dirs == null) {
      throw new IOException("Configuration mapred.input.dir is not defined.");
    }
    String [] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
      result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
  }

  /**
   * The global information about the split generation that we pass around to
   * the different worker threads.
   */
  static class Context {
    private final Configuration conf;

    // We store all caches in variables to change the main one based on config.
    // This is not thread safe between different split generations (and wasn't anyway).
    private FooterCache footerCache;
    private static LocalCache localCache;
    private static ExternalCache metaCache;
    static ExecutorService threadPool = null;
    private final int numBuckets;
    private final int splitStrategyBatchMs;
    private final long maxSize;
    private final long minSize;
    private final int etlFileThreshold;
    private final boolean footerInSplits;
    private final boolean cacheStripeDetails;
    private final boolean forceThreadpool;
    private final AtomicInteger cacheHitCounter = new AtomicInteger(0);
    private final AtomicInteger numFilesCounter = new AtomicInteger(0);
    private final ValidTxnList transactionList;
    private SplitStrategyKind splitStrategyKind;
    private final SearchArgument sarg;
    private final AcidOperationalProperties acidOperationalProperties;

    Context(Configuration conf) throws IOException {
      this(conf, 1, null);
    }

    Context(Configuration conf, final int minSplits) throws IOException {
      this(conf, minSplits, null);
    }

    @VisibleForTesting
    Context(Configuration conf, final int minSplits, ExternalFooterCachesByConf efc)
        throws IOException {
      this.conf = conf;
      this.forceThreadpool = HiveConf.getBoolVar(conf, ConfVars.HIVE_IN_TEST);
      this.sarg = ConvertAstToSearchArg.createFromConf(conf);
      minSize = HiveConf.getLongVar(conf, ConfVars.MAPREDMINSPLITSIZE, DEFAULT_MIN_SPLIT_SIZE);
      maxSize = HiveConf.getLongVar(conf, ConfVars.MAPREDMAXSPLITSIZE, DEFAULT_MAX_SPLIT_SIZE);
      String ss = conf.get(ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname);
      if (ss == null || ss.equals(SplitStrategyKind.HYBRID.name())) {
        splitStrategyKind = SplitStrategyKind.HYBRID;
      } else {
        LOG.info("Enforcing " + ss + " ORC split strategy");
        splitStrategyKind = SplitStrategyKind.valueOf(ss);
      }
      footerInSplits = HiveConf.getBoolVar(conf,
          ConfVars.HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS);
      numBuckets =
          Math.max(conf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0), 0);
      splitStrategyBatchMs = HiveConf.getIntVar(conf, ConfVars.HIVE_ORC_SPLIT_DIRECTORY_BATCH_MS);
      LOG.debug("Number of buckets specified by conf file is " + numBuckets);
      long cacheMemSize = HiveConf.getSizeVar(
          conf, ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_MEMORY_SIZE);
      int numThreads = HiveConf.getIntVar(conf, ConfVars.HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS);
      boolean useSoftReference = HiveConf.getBoolVar(
          conf, ConfVars.HIVE_ORC_CACHE_USE_SOFT_REFERENCES);

      cacheStripeDetails = (cacheMemSize > 0);

      this.etlFileThreshold = minSplits <= 0 ? DEFAULT_ETL_FILE_THRESHOLD : minSplits;

      synchronized (Context.class) {
        if (threadPool == null) {
          threadPool = Executors.newFixedThreadPool(numThreads,
              new ThreadFactoryBuilder().setDaemon(true)
                  .setNameFormat("ORC_GET_SPLITS #%d").build());
        }

        // TODO: local cache is created once, so the configs for future queries will not be honored.
        if (cacheStripeDetails) {
          // Note that there's no FS check here; we implicitly only use metastore cache for
          // HDFS, because only HDFS would return fileIds for us. If fileId is extended using
          // size/mod time/etc. for other FSes, we might need to check FSes explicitly because
          // using such an aggregate fileId cache is not bulletproof and should be disable-able.
          boolean useExternalCache = HiveConf.getBoolVar(
              conf, HiveConf.ConfVars.HIVE_ORC_MS_FOOTER_CACHE_ENABLED);
          if (useExternalCache) {
            if (LOG.isDebugEnabled()) {
              LOG.debug(
                "Turning off hive.orc.splits.ms.footer.cache.enabled since it is not fully supported yet");
            }
            useExternalCache = false;
          }
          if (localCache == null) {
            localCache = new LocalCache(numThreads, cacheMemSize, useSoftReference);
          }
          if (useExternalCache) {
            if (metaCache == null) {
              metaCache = new ExternalCache(localCache,
                  efc == null ? new MetastoreExternalCachesByConf() : efc);
            }
            assert conf instanceof HiveConf;
            metaCache.configure((HiveConf)conf);
          }
          // Set footer cache for current split generation. See field comment - not thread safe.
          // TODO: we should be able to enable caches separately
          footerCache = useExternalCache ? metaCache : localCache;
        }
      }
      String value = conf.get(ValidTxnList.VALID_TXNS_KEY);
      transactionList = value == null ? new ValidReadTxnList() : new ValidReadTxnList(value);

      // Determine the transactional_properties of the table from the job conf stored in context.
      // The table properties are copied to job conf at HiveInputFormat::addSplitsForGroup(),
      // & therefore we should be able to retrieve them here and determine appropriate behavior.
      // Note that this will be meaningless for non-acid tables & will be set to null.
      boolean isTableTransactional = conf.getBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, false);
      String transactionalProperties = conf.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES);
      this.acidOperationalProperties = isTableTransactional ?
          AcidOperationalProperties.parseString(transactionalProperties) : null;
    }

    @VisibleForTesting
    static int getCurrentThreadPoolSize() {
      synchronized (Context.class) {
        return (threadPool instanceof ThreadPoolExecutor)
            ? ((ThreadPoolExecutor)threadPool).getPoolSize() : ((threadPool == null) ? 0 : -1);
      }
    }

    @VisibleForTesting
    public static void resetThreadPool() {
      synchronized (Context.class) {
        threadPool = null;
      }
    }

    @VisibleForTesting
    public static void clearLocalCache() {
      if (localCache == null) return;
      localCache.clear();
    }
  }

  /**
   * The full ACID directory information needed for splits; no more calls to HDFS needed.
   * We could just live with AcidUtils.Directory but...
   * 1) That doesn't have base files for the base-directory case.
   * 2) We save fs for convenience to avoid getting it twice.
   */
  @VisibleForTesting
  static final class AcidDirInfo {
    public AcidDirInfo(FileSystem fs, Path splitPath, Directory acidInfo,
        List<AcidBaseFileInfo> baseFiles,
        List<ParsedDelta> parsedDeltas) {
      this.splitPath = splitPath;
      this.acidInfo = acidInfo;
      this.baseFiles = baseFiles;
      this.fs = fs;
      this.parsedDeltas = parsedDeltas;
    }

    final FileSystem fs;
    final Path splitPath;
    final AcidUtils.Directory acidInfo;
    final List<AcidBaseFileInfo> baseFiles;
    final List<ParsedDelta> parsedDeltas;
  }

  @VisibleForTesting
  interface SplitStrategy<T> {
    List<T> getSplits() throws IOException;
  }

  @VisibleForTesting
  static final class SplitInfo extends ACIDSplitStrategy {
    private final Context context;
    private final FileSystem fs;
    private final HdfsFileStatusWithId fileWithId;
    private final OrcTail orcTail;
    private final List<OrcProto.Type> readerTypes;
    private final boolean isOriginal;
    private final List<DeltaMetaData> deltas;
    private final boolean hasBase;
    private final ByteBuffer ppdResult;

    SplitInfo(Context context, FileSystem fs, HdfsFileStatusWithId fileWithId, OrcTail orcTail,
        List<OrcProto.Type> readerTypes, boolean isOriginal, List<DeltaMetaData> deltas,
        boolean hasBase, Path dir, boolean[] covered, ByteBuffer ppdResult) throws IOException {
      super(dir, context.numBuckets, deltas, covered, context.acidOperationalProperties);
      this.context = context;
      this.fs = fs;
      this.fileWithId = fileWithId;
      this.orcTail = orcTail;
      this.readerTypes = readerTypes;
      this.isOriginal = isOriginal;
      this.deltas = deltas;
      this.hasBase = hasBase;
      this.ppdResult = ppdResult;
    }

    @VisibleForTesting
    public SplitInfo(Context context, FileSystem fs, FileStatus fileStatus, OrcTail orcTail,
        List<OrcProto.Type> readerTypes,  boolean isOriginal, ArrayList<DeltaMetaData> deltas,
        boolean hasBase, Path dir, boolean[] covered) throws IOException {
      this(context, fs, AcidUtils.createOriginalObj(null, fileStatus),
          orcTail, readerTypes, isOriginal, deltas, hasBase, dir, covered, null);
    }
  }

  /**
   * ETL strategy is used when spending little more time in split generation is acceptable
   * (split generation reads and caches file footers).
   */
  static final class ETLSplitStrategy implements SplitStrategy<SplitInfo>, Callable<Void> {
    private static final int ETL_COMBINE_FILE_LIMIT = 500;

    private static class ETLDir {
      public ETLDir(Path dir, FileSystem fs, int fileCount) {
        this.dir = dir;
        this.fs = fs;
        this.fileCount = fileCount;
      }
      private final int fileCount;
      private final Path dir;
      private final FileSystem fs;
    }

    Context context;
    final List<ETLDir> dirs;
    List<HdfsFileStatusWithId> files;
    private final List<DeltaMetaData> deltas;
    private final boolean[] covered;
    final boolean isOriginal;
    final List<OrcProto.Type> readerTypes;
    // References to external fields for async SplitInfo generation.
    private List<Future<List<OrcSplit>>> splitFuturesRef = null;
    private List<OrcSplit> splitsRef = null;
    private final UserGroupInformation ugi;
    private final boolean allowSyntheticFileIds;

    public ETLSplitStrategy(Context context, FileSystem fs, Path dir,
        List<HdfsFileStatusWithId> children, List<OrcProto.Type> readerTypes, boolean isOriginal,
        List<DeltaMetaData> deltas, boolean[] covered, UserGroupInformation ugi, boolean allowSyntheticFileIds) {
      assert !children.isEmpty();
      this.context = context;
      this.dirs = Lists.newArrayList(new ETLDir(dir, fs, children.size()));
      this.files = children;
      this.isOriginal = isOriginal;
      this.readerTypes = readerTypes;
      this.deltas = deltas;
      this.covered = covered;
      this.ugi = ugi;
      this.allowSyntheticFileIds = allowSyntheticFileIds;
    }

    @Override
    public List<SplitInfo> getSplits() throws IOException {
      List<SplitInfo> result = new ArrayList<>(files.size());
      // Force local cache if we have deltas.
      FooterCache cache = context.cacheStripeDetails ? ((deltas == null || deltas.isEmpty())
          ? context.footerCache : Context.localCache) : null;
      if (cache != null) {
        OrcTail[] orcTails = new OrcTail[files.size()];
        ByteBuffer[] ppdResults = null;
        if (cache.hasPpd()) {
          ppdResults = new ByteBuffer[files.size()];
        }
        try {
          cache.getAndValidate(files, isOriginal, orcTails, ppdResults);
        } catch (HiveException e) {
          throw new IOException(e);
        }
        int dirIx = -1, fileInDirIx = -1, filesInDirCount = 0;
        ETLDir dir = null;
        for (int i = 0; i < files.size(); ++i) {
          if ((++fileInDirIx) == filesInDirCount) {
            dir = dirs.get(++dirIx);
            filesInDirCount = dir.fileCount;
          }
          OrcTail orcTail = orcTails[i];
          ByteBuffer ppdResult = ppdResults == null ? null : ppdResults[i];
          HdfsFileStatusWithId file = files.get(i);
          if (orcTail != null) {
            // Cached copy is valid
            context.cacheHitCounter.incrementAndGet();
          }
          // Ignore files eliminated by PPD, or of 0 length.
          if (ppdResult != FooterCache.NO_SPLIT_AFTER_PPD && file.getFileStatus().getLen() > 0) {
            result.add(new SplitInfo(context, dir.fs, file, orcTail, readerTypes,
                isOriginal, deltas, true, dir.dir, covered, ppdResult));
          }
        }
      } else {
        int dirIx = -1, fileInDirIx = -1, filesInDirCount = 0;
        ETLDir dir = null;
        for (HdfsFileStatusWithId file : files) {
          if ((++fileInDirIx) == filesInDirCount) {
            dir = dirs.get(++dirIx);
            filesInDirCount = dir.fileCount;
          }
          // ignore files of 0 length
          if (file.getFileStatus().getLen() > 0) {
            result.add(new SplitInfo(context, dir.fs, file, null, readerTypes,
                isOriginal, deltas, true, dir.dir, covered, null));
          }
        }
      }
      return result;
    }

    @Override
    public String toString() {
      if (dirs.size() == 1) {
        return ETLSplitStrategy.class.getSimpleName() + " strategy for " + dirs.get(0).dir;
      } else {
        StringBuilder sb = new StringBuilder(ETLSplitStrategy.class.getSimpleName()
            + " strategy for ");
        boolean isFirst = true;
        for (ETLDir dir : dirs) {
          if (!isFirst) sb.append(", ");
          isFirst = false;
          sb.append(dir.dir);
        }
        return sb.toString();
      }
    }

    enum CombineResult {
      YES, // Combined, all good.
      NO_AND_CONTINUE, // Don't combine with that, but may combine with others.
      NO_AND_SWAP // Don't combine with with that, and make that a base for new combines.
      // We may add NO_AND_STOP in future where combine is impossible and other should not be base.
    }

    public CombineResult combineWith(FileSystem fs, Path dir,
        List<HdfsFileStatusWithId> otherFiles, boolean isOriginal) {
      if ((files.size() + otherFiles.size()) > ETL_COMBINE_FILE_LIMIT
        || this.isOriginal != isOriginal) {
        return (files.size() > otherFiles.size())
            ? CombineResult.NO_AND_SWAP : CombineResult.NO_AND_CONTINUE;
      }
      // All good, combine the base/original only ETL strategies.
      files.addAll(otherFiles);
      dirs.add(new ETLDir(dir, fs, otherFiles.size()));
      return CombineResult.YES;
    }

    public Future<Void> generateSplitWork(Context context,
        List<Future<List<OrcSplit>>> splitFutures, List<OrcSplit> splits) throws IOException {
      if ((context.cacheStripeDetails && context.footerCache.isBlocking())
          || context.forceThreadpool) {
        this.splitFuturesRef = splitFutures;
        this.splitsRef = splits;
        return Context.threadPool.submit(this);
      } else {
        runGetSplitsSync(splitFutures, splits, null);
        return null;
      }
    }

    @Override
    public Void call() throws IOException {
      if (ugi == null) {
        runGetSplitsSync(splitFuturesRef, splitsRef, null);
        return null;
      }
      try {
        return ugi.doAs(new PrivilegedExceptionAction<Void>() {
          @Override
          public Void run() throws Exception {
            runGetSplitsSync(splitFuturesRef, splitsRef, ugi);
            return null;
          }
        });
      } catch (InterruptedException e) {
        throw new IOException(e);
      }
    }





    private void runGetSplitsSync(List<Future<List<OrcSplit>>> splitFutures,
        List<OrcSplit> splits, UserGroupInformation ugi) throws IOException {
      UserGroupInformation tpUgi = ugi == null ? UserGroupInformation.getCurrentUser() : ugi;
      List<SplitInfo> splitInfos = getSplits();
      List<Future<List<OrcSplit>>> localListF = null;
      List<OrcSplit> localListS = null;
      for (SplitInfo splitInfo : splitInfos) {
        SplitGenerator sg = new SplitGenerator(splitInfo, tpUgi, allowSyntheticFileIds);
        if (!sg.isBlocking()) {
          if (localListS == null) {
            localListS = new ArrayList<>(splits.size());
          }
          // Already called in doAs, so no need to doAs here.
          localListS.addAll(sg.call());
        } else {
          if (localListF == null) {
            localListF = new ArrayList<>(splits.size());
          }
          localListF.add(Context.threadPool.submit(sg));
        }
      }
      if (localListS != null) {
        synchronized (splits) {
          splits.addAll(localListS);
        }
      }
      if (localListF != null) {
        synchronized (splitFutures) {
          splitFutures.addAll(localListF);
        }
       }
     }
   }

  /**
   * BI strategy is used when the requirement is to spend less time in split generation
   * as opposed to query execution (split generation does not read or cache file footers).
   */
  static final class BISplitStrategy extends ACIDSplitStrategy {
    private final List<HdfsFileStatusWithId> fileStatuses;
    private final boolean isOriginal;
    private final List<DeltaMetaData> deltas;
    private final FileSystem fs;
    private final Path dir;
    private final boolean allowSyntheticFileIds;

    public BISplitStrategy(Context context, FileSystem fs,
        Path dir, List<HdfsFileStatusWithId> fileStatuses, boolean isOriginal,
        List<DeltaMetaData> deltas, boolean[] covered, boolean allowSyntheticFileIds) {
      super(dir, context.numBuckets, deltas, covered, context.acidOperationalProperties);
      this.fileStatuses = fileStatuses;
      this.isOriginal = isOriginal;
      this.deltas = deltas;
      this.fs = fs;
      this.dir = dir;
      this.allowSyntheticFileIds = allowSyntheticFileIds;
    }

    @Override
    public List<OrcSplit> getSplits() throws IOException {
      List<OrcSplit> splits = Lists.newArrayList();
      for (HdfsFileStatusWithId file : fileStatuses) {
        FileStatus fileStatus = file.getFileStatus();
        if (fileStatus.getLen() != 0) {
          Object fileKey = file.getFileId();
          if (fileKey == null && allowSyntheticFileIds) {
            fileKey = new SyntheticFileId(fileStatus);
          }
          TreeMap<Long, BlockLocation> blockOffsets = SHIMS.getLocationsWithOffset(fs, fileStatus);
          for (Map.Entry<Long, BlockLocation> entry : blockOffsets.entrySet()) {
            OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), fileKey, entry.getKey(),
                entry.getValue().getLength(), entry.getValue().getHosts(), null, isOriginal, true,
                deltas, -1, fileStatus.getLen());
            splits.add(orcSplit);
          }
        }
      }

      // add uncovered ACID delta splits
      splits.addAll(super.getSplits());
      return splits;
    }

    @Override
    public String toString() {
      return BISplitStrategy.class.getSimpleName() + " strategy for " + dir;
    }
  }

  /**
   * ACID split strategy is used when there is no base directory (when transactions are enabled).
   */
  static class ACIDSplitStrategy implements SplitStrategy<OrcSplit> {
    Path dir;
    List<DeltaMetaData> deltas;
    boolean[] covered;
    int numBuckets;
    AcidOperationalProperties acidOperationalProperties;

    public ACIDSplitStrategy(Path dir, int numBuckets, List<DeltaMetaData> deltas, boolean[] covered,
        AcidOperationalProperties acidOperationalProperties) {
      this.dir = dir;
      this.numBuckets = numBuckets;
      this.deltas = deltas;
      this.covered = covered;
      this.acidOperationalProperties = acidOperationalProperties;
    }

    @Override
    public List<OrcSplit> getSplits() throws IOException {
      List<OrcSplit> splits = Lists.newArrayList();

      // When split-update is enabled, we do not need to account for buckets that aren't covered.
      // This is a huge performance benefit of split-update. And the reason why we are able to
      // do so is because the 'deltas' here are actually only the delete_deltas. All the insert_deltas
      // with valid user payload data has already been considered as base for the covered buckets.
      // Hence, the uncovered buckets do not have any relevant data and we can just ignore them.
      if (acidOperationalProperties != null && acidOperationalProperties.isSplitUpdate()) {
        return splits; // return an empty list.
      }

      // Generate a split for any buckets that weren't covered.
      // This happens in the case where a bucket just has deltas and no
      // base.
      if (!deltas.isEmpty()) {
        for (int b = 0; b < numBuckets; ++b) {
          if (!covered[b]) {
            splits.add(new OrcSplit(dir, null, b, 0, new String[0], null, false, false, deltas, -1, -1));
          }
        }
      }
      return splits;
    }

    @Override
    public String toString() {
      return ACIDSplitStrategy.class.getSimpleName() + " strategy for " + dir;
    }
  }

  /**
   * Given a directory, get the list of files and blocks in those files.
   * To parallelize file generator use "mapreduce.input.fileinputformat.list-status.num-threads"
   */
  static final class FileGenerator implements Callable<AcidDirInfo> {
    private final Context context;
    private final FileSystem fs;
    private final Path dir;
    private final Ref<Boolean> useFileIds;
    private final UserGroupInformation ugi;

    FileGenerator(Context context, FileSystem fs, Path dir, boolean useFileIds,
        UserGroupInformation ugi) {
      this(context, fs, dir, Ref.from(useFileIds), ugi);
    }

    FileGenerator(Context context, FileSystem fs, Path dir, Ref<Boolean> useFileIds,
        UserGroupInformation ugi) {
      this.context = context;
      this.fs = fs;
      this.dir = dir;
      this.useFileIds = useFileIds;
      this.ugi = ugi;
    }

    @Override
    public AcidDirInfo call() throws IOException {
      if (ugi == null) {
        return callInternal();
      }
      try {
        return ugi.doAs(new PrivilegedExceptionAction<AcidDirInfo>() {
          @Override
          public AcidDirInfo run() throws Exception {
            return callInternal();
          }
        });
      } catch (InterruptedException e) {
        throw new IOException(e);
      }
    }

    private AcidDirInfo callInternal() throws IOException {
      AcidUtils.Directory dirInfo = AcidUtils.getAcidState(dir, context.conf,
          context.transactionList, useFileIds, true);
      Path base = dirInfo.getBaseDirectory();
      // find the base files (original or new style)
      List<AcidBaseFileInfo> baseFiles = new ArrayList<AcidBaseFileInfo>();
      if (base == null) {
        for (HdfsFileStatusWithId fileId : dirInfo.getOriginalFiles()) {
          baseFiles.add(new AcidBaseFileInfo(fileId, AcidUtils.AcidBaseFileType.ORIGINAL_BASE));
        }
      } else {
        List<HdfsFileStatusWithId> compactedBaseFiles = findBaseFiles(base, useFileIds);
        for (HdfsFileStatusWithId fileId : compactedBaseFiles) {
          baseFiles.add(new AcidBaseFileInfo(fileId, AcidUtils.AcidBaseFileType.COMPACTED_BASE));
        }
      }

      // Find the parsed deltas- some of them containing only the insert delta events
      // may get treated as base if split-update is enabled for ACID. (See HIVE-14035 for details)
      List<ParsedDelta> parsedDeltas = new ArrayList<ParsedDelta>();

      if (context.acidOperationalProperties != null &&
          context.acidOperationalProperties.isSplitUpdate()) {
        // If we have split-update turned on for this table, then the delta events have already been
        // split into two directories- delta_x_y/ and delete_delta_x_y/.
        // When you have split-update turned on, the insert events go to delta_x_y/ directory and all
        // the delete events go to delete_x_y/. An update event will generate two events-
        // a delete event for the old record that is put into delete_delta_x_y/,
        // followed by an insert event for the updated record put into the usual delta_x_y/.
        // Therefore, everything inside delta_x_y/ is an insert event and all the files in delta_x_y/
        // can be treated like base files. Hence, each of these are added to baseOrOriginalFiles list.

        for (ParsedDelta parsedDelta : dirInfo.getCurrentDirectories()) {
          if (parsedDelta.isDeleteDelta()) {
            parsedDeltas.add(parsedDelta);
          } else {
            // This is a normal insert delta, which only has insert events and hence all the files
            // in this delta directory can be considered as a base.
            Boolean val = useFileIds.value;
            if (val == null || val) {
              try {
                List<HdfsFileStatusWithId> insertDeltaFiles =
                    SHIMS.listLocatedHdfsStatus(fs, parsedDelta.getPath(), AcidUtils.hiddenFileFilter);
                for (HdfsFileStatusWithId fileId : insertDeltaFiles) {
                  baseFiles.add(new AcidBaseFileInfo(fileId, AcidUtils.AcidBaseFileType.INSERT_DELTA));
                }
                if (val == null) {
                  useFileIds.value = true; // The call succeeded, so presumably the API is there.
                }
                continue; // move on to process to the next parsedDelta.
              } catch (Throwable t) {
                LOG.error("Failed to get files with ID; using regular API: " + t.getMessage());
                if (val == null && t instanceof UnsupportedOperationException) {
                  useFileIds.value = false;
                }
              }
            }
            // Fall back to regular API and create statuses without ID.
            List<FileStatus> children = HdfsUtils.listLocatedStatus(fs, parsedDelta.getPath(), AcidUtils.hiddenFileFilter);
            for (FileStatus child : children) {
              HdfsFileStatusWithId fileId = AcidUtils.createOriginalObj(null, child);
              baseFiles.add(new AcidBaseFileInfo(fileId, AcidUtils.AcidBaseFileType.INSERT_DELTA));
            }
          }
        }

      } else {
        // When split-update is not enabled, then all the deltas in the current directories
        // should be considered as usual.
        parsedDeltas.addAll(dirInfo.getCurrentDirectories());
      }
      return new AcidDirInfo(fs, dir, dirInfo, baseFiles, parsedDeltas);
    }

    private List<HdfsFileStatusWithId> findBaseFiles(
        Path base, Ref<Boolean> useFileIds) throws IOException {
      Boolean val = useFileIds.value;
      if (val == null || val) {
        try {
          List<HdfsFileStatusWithId> result = SHIMS.listLocatedHdfsStatus(
              fs, base, AcidUtils.hiddenFileFilter);
          if (val == null) {
            useFileIds.value = true; // The call succeeded, so presumably the API is there.
          }
          return result;
        } catch (Throwable t) {
          LOG.error("Failed to get files with ID; using regular API: " + t.getMessage());
          if (val == null && t instanceof UnsupportedOperationException) {
            useFileIds.value = false;
          }
        }
      }

      // Fall back to regular API and create states without ID.
      List<FileStatus> children = HdfsUtils.listLocatedStatus(fs, base, AcidUtils.hiddenFileFilter);
      List<HdfsFileStatusWithId> result = new ArrayList<>(children.size());
      for (FileStatus child : children) {
        result.add(AcidUtils.createOriginalObj(null, child));
      }
      return result;
    }
  }

  /**
   * Split the stripes of a given file into input splits.
   * A thread is used for each file.
   */
  static final class SplitGenerator implements Callable<List<OrcSplit>> {
    private final Context context;
    private final FileSystem fs;
    private final FileStatus file;
    private final Long fsFileId;
    private final long blockSize;
    private final TreeMap<Long, BlockLocation> locations;
    private OrcTail orcTail;
    private List<OrcProto.Type> readerTypes;
    private List<StripeInformation> stripes;
    private List<StripeStatistics> stripeStats;
    private List<OrcProto.Type> fileTypes;
    private boolean[] readerIncluded;    // The included columns of the reader / file schema that
                                         // include ACID columns if present.
    private final boolean isOriginal;
    private final List<DeltaMetaData> deltas;
    private final boolean hasBase;
    private OrcFile.WriterVersion writerVersion;
    private long projColsUncompressedSize;
    private final List<OrcSplit> deltaSplits;
    private final ByteBuffer ppdResult;
    private final UserGroupInformation ugi;
    private final boolean allowSyntheticFileIds;
    private SchemaEvolution evolution;

    public SplitGenerator(SplitInfo splitInfo, UserGroupInformation ugi,
        boolean allowSyntheticFileIds) throws IOException {
      this.ugi = ugi;
      this.context = splitInfo.context;
      this.fs = splitInfo.fs;
      this.file = splitInfo.fileWithId.getFileStatus();
      this.fsFileId = splitInfo.fileWithId.getFileId();
      this.blockSize = this.file.getBlockSize();
      this.orcTail = splitInfo.orcTail;
      this.readerTypes = splitInfo.readerTypes;
      // TODO: potential DFS call
      this.locations = SHIMS.getLocationsWithOffset(fs, file);
      this.isOriginal = splitInfo.isOriginal;
      this.deltas = splitInfo.deltas;
      this.hasBase = splitInfo.hasBase;
      this.projColsUncompressedSize = -1;
      this.deltaSplits = splitInfo.getSplits();
      this.allowSyntheticFileIds = allowSyntheticFileIds;
      this.ppdResult = splitInfo.ppdResult;
    }

    public boolean isBlocking() {
      return true;
    }

    Path getPath() {
      return file.getPath();
    }

    @Override
    public String toString() {
      return "splitter(" + file.getPath() + ")";
    }

    /**
     * Compute the number of bytes that overlap between the two ranges.
     * @param offset1 start of range1
     * @param length1 length of range1
     * @param offset2 start of range2
     * @param length2 length of range2
     * @return the number of bytes in the overlap range
     */
    static long getOverlap(long offset1, long length1,
                           long offset2, long length2) {
      long end1 = offset1 + length1;
      long end2 = offset2 + length2;
      if (end2 <= offset1 || end1 <= offset2) {
        return 0;
      } else {
        return Math.min(end1, end2) - Math.max(offset1, offset2);
      }
    }

    /**
     * Create an input split over the given range of bytes. The location of the
     * split is based on where the majority of the byte are coming from. ORC
     * files are unlikely to have splits that cross between blocks because they
     * are written with large block sizes.
     * @param offset the start of the split
     * @param length the length of the split
     * @param orcTail orc tail
     * @throws IOException
     */
    OrcSplit createSplit(long offset, long length, OrcTail orcTail) throws IOException {
      String[] hosts;
      Map.Entry<Long, BlockLocation> startEntry = locations.floorEntry(offset);
      BlockLocation start = startEntry.getValue();
      if (offset + length <= start.getOffset() + start.getLength()) {
        // handle the single block case
        hosts = start.getHosts();
      } else {
        Map.Entry<Long, BlockLocation> endEntry = locations.floorEntry(offset + length);
        //get the submap
        NavigableMap<Long, BlockLocation> navigableMap = locations.subMap(startEntry.getKey(),
                  true, endEntry.getKey(), true);
        // Calculate the number of bytes in the split that are local to each
        // host.
        Map<String, LongWritable> sizes = new HashMap<String, LongWritable>();
        long maxSize = 0;
        for (BlockLocation block : navigableMap.values()) {
          long overlap = getOverlap(offset, length, block.getOffset(),
              block.getLength());
          if (overlap > 0) {
            for(String host: block.getHosts()) {
              LongWritable val = sizes.get(host);
              if (val == null) {
                val = new LongWritable();
                sizes.put(host, val);
              }
              val.set(val.get() + overlap);
              maxSize = Math.max(maxSize, val.get());
            }
          } else {
            throw new IOException("File " + file.getPath().toString() +
                    " should have had overlap on block starting at " + block.getOffset());
          }
        }
        // filter the list of locations to those that have at least 80% of the
        // max
        long threshold = (long) (maxSize * MIN_INCLUDED_LOCATION);
        List<String> hostList = new ArrayList<String>();
        // build the locations in a predictable order to simplify testing
        for(BlockLocation block: navigableMap.values()) {
          for(String host: block.getHosts()) {
            if (sizes.containsKey(host)) {
              if (sizes.get(host).get() >= threshold) {
                hostList.add(host);
              }
              sizes.remove(host);
            }
          }
        }
        hosts = new String[hostList.size()];
        hostList.toArray(hosts);
      }

      // scale the raw data size to split level based on ratio of split wrt to file length
      final long fileLen = file.getLen();
      final double splitRatio = (double) length / (double) fileLen;
      final long scaledProjSize = projColsUncompressedSize > 0 ?
          (long) (splitRatio * projColsUncompressedSize) : fileLen;
      Object fileKey = fsFileId;
      if (fileKey == null && allowSyntheticFileIds) {
        fileKey = new SyntheticFileId(file);
      }
      return new OrcSplit(file.getPath(), fileKey, offset, length, hosts,
          orcTail, isOriginal, hasBase, deltas, scaledProjSize, fileLen);
    }

    private static final class OffsetAndLength { // Java cruft; pair of long.
      public OffsetAndLength() {
        this.offset = -1;
        this.length = 0;
      }

      long offset, length;

      @Override
      public String toString() {
        return "[offset=" + offset + ", length=" + length + "]";
      }
    }

    /**
     * Divide the adjacent stripes in the file into input splits based on the
     * block size and the configured minimum and maximum sizes.
     */
    @Override
    public List<OrcSplit> call() throws IOException {
      if (ugi == null) {
        return callInternal();
      }
      try {
        return ugi.doAs(new PrivilegedExceptionAction<List<OrcSplit>>() {
          @Override
          public List<OrcSplit> run() throws Exception {
            return callInternal();
          }
        });
      } catch (InterruptedException e) {
        throw new IOException(e);
      }
    }

    private List<OrcSplit> callInternal() throws IOException {
      // Figure out which stripes we need to read.
      if (ppdResult != null) {
        assert deltaSplits.isEmpty();
        assert ppdResult.hasArray();

        // TODO: when PB is upgraded to 2.6, newInstance(ByteBuffer) method should be used here.
        CodedInputStream cis = CodedInputStream.newInstance(
            ppdResult.array(), ppdResult.arrayOffset(), ppdResult.remaining());
        cis.setSizeLimit(InStream.PROTOBUF_MESSAGE_MAX_LIMIT);
        return generateSplitsFromPpd(SplitInfos.parseFrom(cis));
      } else {
        populateAndCacheStripeDetails();
        boolean[] includeStripe = null;
        // We can't eliminate stripes if there are deltas because the
        // deltas may change the rows making them match the predicate.
        if ((deltas == null || deltas.isEmpty()) && context.sarg != null) {
          String[] colNames =
              extractNeededColNames((readerTypes == null ? fileTypes : readerTypes),
                  context.conf, readerIncluded, isOriginal);
          if (colNames == null) {
            LOG.warn("Skipping split elimination for {} as column names is null", file.getPath());
          } else {
            includeStripe = pickStripes(context.sarg, writerVersion,
                stripeStats, stripes.size(), file.getPath(), evolution);
          }
        }
        return generateSplitsFromStripes(includeStripe);
      }
    }

    private List<OrcSplit> generateSplitsFromPpd(SplitInfos ppdResult) throws IOException {
      OffsetAndLength current = new OffsetAndLength();
      List<OrcSplit> splits = new ArrayList<>(ppdResult.getInfosCount());
      int lastIdx = -1;
      for (Metastore.SplitInfo si : ppdResult.getInfosList()) {
        int index = si.getIndex();
        if (lastIdx >= 0 && lastIdx + 1 != index && current.offset != -1) {
          // Create split for the previous unfinished stripe.
          splits.add(createSplit(current.offset, current.length, orcTail));
          current.offset = -1;
        }
        lastIdx = index;
        String debugStr = null;
        if (LOG.isDebugEnabled()) {
          debugStr = current.toString();
        }
        current = generateOrUpdateSplit(splits, current, si.getOffset(), si.getLength(), null);
        if (LOG.isDebugEnabled()) {
          LOG.debug("Updated split from {" + index + ": " + si.getOffset() + ", "
              + si.getLength() + "} and "+ debugStr + " to " + current);
        }
      }
      generateLastSplit(splits, current, null);
      return splits;
    }

    private List<OrcSplit> generateSplitsFromStripes(boolean[] includeStripe) throws IOException {
      List<OrcSplit> splits = new ArrayList<>(stripes.size());
      // if we didn't have predicate pushdown, read everything
      if (includeStripe == null) {
        includeStripe = new boolean[stripes.size()];
        Arrays.fill(includeStripe, true);
      }

      OffsetAndLength current = new OffsetAndLength();
      int idx = -1;
      for (StripeInformation stripe : stripes) {
        idx++;

        if (!includeStripe[idx]) {
          // create split for the previous unfinished stripe
          if (current.offset != -1) {
            splits.add(createSplit(current.offset, current.length, orcTail));
            current.offset = -1;
          }
          continue;
        }

        current = generateOrUpdateSplit(
            splits, current, stripe.getOffset(), stripe.getLength(), orcTail);
      }
      generateLastSplit(splits, current, orcTail);

      // Add uncovered ACID delta splits.
      splits.addAll(deltaSplits);
      return splits;
    }

    private OffsetAndLength generateOrUpdateSplit(
        List<OrcSplit> splits, OffsetAndLength current, long offset,
        long length, OrcTail orcTail) throws IOException {
      // if we are working on a stripe, over the min stripe size, and
      // crossed a block boundary, cut the input split here.
      if (current.offset != -1 && current.length > context.minSize &&
          (current.offset / blockSize != offset / blockSize)) {
        splits.add(createSplit(current.offset, current.length, orcTail));
        current.offset = -1;
      }
      // if we aren't building a split, start a new one.
      if (current.offset == -1) {
        current.offset = offset;
        current.length = length;
      } else {
        current.length = (offset + length) - current.offset;
      }
      if (current.length >= context.maxSize) {
        splits.add(createSplit(current.offset, current.length, orcTail));
        current.offset = -1;
      }
      return current;
    }

    private void generateLastSplit(List<OrcSplit> splits, OffsetAndLength current,
        OrcTail orcTail) throws IOException {
      if (current.offset == -1) return;
      splits.add(createSplit(current.offset, current.length, orcTail));
    }

    private void populateAndCacheStripeDetails() throws IOException {
      // When reading the file for first time we get the orc tail from the orc reader and cache it
      // in the footer cache. Subsequent requests will get the orc tail from the cache (if file
      // length and modification time is not changed) and populate the split info. If the split info
      // object contains the orc tail from the cache then we can skip creating orc reader avoiding
      // filesystem calls.
      if (orcTail == null) {
        Reader orcReader = OrcFile.createReader(file.getPath(),
            OrcFile.readerOptions(context.conf)
                .filesystem(fs)
                .maxLength(file.getLen()));
        orcTail = new OrcTail(orcReader.getFileTail(), orcReader.getSerializedFileFooter(),
            file.getModificationTime());
        if (context.cacheStripeDetails) {
          context.footerCache.put(new FooterCacheKey(fsFileId, file.getPath()), orcTail);
        }
      }
      stripes = orcTail.getStripes();
      stripeStats = orcTail.getStripeStatistics();
      fileTypes = orcTail.getTypes();
      TypeDescription fileSchema = OrcUtils.convertTypeFromProtobuf(fileTypes, 0);
      Reader.Options readerOptions = new Reader.Options(context.conf);
      if (readerTypes == null) {
        readerIncluded = genIncludedColumns(fileSchema, context.conf);
        evolution = new SchemaEvolution(fileSchema, null, readerOptions.include(readerIncluded));
      } else {
        // The reader schema always comes in without ACID columns.
        TypeDescription readerSchema = OrcUtils.convertTypeFromProtobuf(readerTypes, 0);
        readerIncluded = genIncludedColumns(readerSchema, context.conf);
        evolution = new SchemaEvolution(fileSchema, readerSchema, readerOptions.include(readerIncluded));
        if (!isOriginal) {
          // The SchemaEvolution class has added the ACID metadata columns.  Let's update our
          // readerTypes so PPD code will work correctly.
          readerTypes = OrcUtils.getOrcTypes(evolution.getReaderSchema());
        }
      }
      writerVersion = orcTail.getWriterVersion();
      List<OrcProto.ColumnStatistics> fileColStats = orcTail.getFooter().getStatisticsList();
      boolean[] fileIncluded;
      if (readerTypes == null) {
        fileIncluded = readerIncluded;
      } else {
        fileIncluded = new boolean[fileTypes.size()];
        final int readerSchemaSize = readerTypes.size();
        for (int i = 0; i < readerSchemaSize; i++) {
          TypeDescription fileType = evolution.getFileType(i);
          if (fileType != null) {
            fileIncluded[fileType.getId()] = true;
          }
        }
      }
      projColsUncompressedSize = computeProjectionSize(fileTypes, fileColStats, fileIncluded);
      if (!context.footerInSplits) {
        orcTail = null;
      }
    }

    private long computeProjectionSize(List<OrcProto.Type> fileTypes,
        List<OrcProto.ColumnStatistics> stats, boolean[] fileIncluded) {
      List<Integer> internalColIds = Lists.newArrayList();
      if (fileIncluded == null) {
        // Add all.
        for (int i = 0; i < fileTypes.size(); i++) {
          internalColIds.add(i);
        }
      } else {
        for (int i = 0; i < fileIncluded.length; i++) {
          if (fileIncluded[i]) {
            internalColIds.add(i);
          }
        }
      }
      return ReaderImpl.getRawDataSizeFromColIndices(internalColIds, fileTypes, stats);
    }

    private boolean[] shiftReaderIncludedForAcid(boolean[] included) {
      // We always need the base row
      included[0] = true;
      boolean[] newIncluded = new boolean[included.length + OrcRecordUpdater.FIELDS];
      Arrays.fill(newIncluded, 0, OrcRecordUpdater.FIELDS, true);
      for(int i= 0; i < included.length; ++i) {
        newIncluded[i + OrcRecordUpdater.FIELDS] = included[i];
      }
      return newIncluded;
    }
  }


  /** Class intended to update two values from methods... Java-related cruft. */
  @VisibleForTesting
  static final class CombinedCtx {
    ETLSplitStrategy combined;
    long combineStartUs;
  }

  static List<OrcSplit> generateSplitsInfo(Configuration conf, Context context)
      throws IOException {
    if (LOG.isInfoEnabled()) {
      LOG.info("ORC pushdown predicate: " + context.sarg);
    }
    boolean useFileIdsConfig = HiveConf.getBoolVar(
        conf, ConfVars.HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS);
    // Sharing this state assumes splits will succeed or fail to get it together (same FS).
    // We also start with null and only set it to true on the first call, so we would only do
    // the global-disable thing on the first failure w/the API error, not any random failure.
    Ref<Boolean> useFileIds = Ref.from(useFileIdsConfig ? null : false);
    boolean allowSyntheticFileIds = useFileIdsConfig && HiveConf.getBoolVar(
        conf, ConfVars.HIVE_ORC_ALLOW_SYNTHETIC_FILE_ID_IN_SPLITS);
    List<OrcSplit> splits = Lists.newArrayList();
    List<Future<AcidDirInfo>> pathFutures = Lists.newArrayList();
    List<Future<Void>> strategyFutures = Lists.newArrayList();
    final List<Future<List<OrcSplit>>> splitFutures = Lists.newArrayList();
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();

    // multi-threaded file statuses and split strategy
    Path[] paths = getInputPaths(conf);
    CompletionService<AcidDirInfo> ecs = new ExecutorCompletionService<>(Context.threadPool);
    for (Path dir : paths) {
      FileSystem fs = dir.getFileSystem(conf);
      FileGenerator fileGenerator = new FileGenerator(context, fs, dir, useFileIds, ugi);
      pathFutures.add(ecs.submit(fileGenerator));
    }

    boolean isTransactionalTableScan =
        HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN);
    boolean isSchemaEvolution = HiveConf.getBoolVar(conf, ConfVars.HIVE_SCHEMA_EVOLUTION);
    TypeDescription readerSchema =
        OrcInputFormat.getDesiredRowTypeDescr(conf, isTransactionalTableScan, Integer.MAX_VALUE);
    List<OrcProto.Type> readerTypes = null;
    if (readerSchema != null) {
      readerTypes = OrcUtils.getOrcTypes(readerSchema);
    }
    if (LOG.isDebugEnabled()) {
      LOG.debug("Generate splits schema evolution property " + isSchemaEvolution +
        " reader schema " + (readerSchema == null ? "NULL" : readerSchema.toString()) +
        " transactional scan property " + isTransactionalTableScan);
    }

    // complete path futures and schedule split generation
    try {
      CombinedCtx combinedCtx = (context.splitStrategyBatchMs > 0) ? new CombinedCtx() : null;
      long maxWaitUs = context.splitStrategyBatchMs * 1000000;
      int resultsLeft = paths.length;
      while (resultsLeft > 0) {
        AcidDirInfo adi = null;
        if (combinedCtx != null && combinedCtx.combined != null) {
          long waitTimeUs = combinedCtx.combineStartUs + maxWaitUs - System.nanoTime();
          if (waitTimeUs >= 0) {
            Future<AcidDirInfo> f = ecs.poll(waitTimeUs, TimeUnit.NANOSECONDS);
            adi = (f == null) ? null : f.get();
          }
        } else {
          adi = ecs.take().get();
        }

        if (adi == null) {
          // We were combining SS-es and the time has expired.
          assert combinedCtx.combined != null;
          scheduleSplits(combinedCtx.combined, context, splitFutures, strategyFutures, splits);
          combinedCtx.combined = null;
          continue;
        }

        // We have received a new directory information, make split strategies.
        --resultsLeft;

        // The reason why we can get a list of split strategies here is because for ACID split-update
        // case when we have a mix of original base files & insert deltas, we will produce two
        // independent split strategies for them. There is a global flag 'isOriginal' that is set
        // on a per split strategy basis and it has to be same for all the files in that strategy.
        List<SplitStrategy<?>> splitStrategies = determineSplitStrategies(combinedCtx, context, adi.fs,
            adi.splitPath, adi.acidInfo, adi.baseFiles, adi.parsedDeltas, readerTypes, ugi,
            allowSyntheticFileIds);

        for (SplitStrategy<?> splitStrategy : splitStrategies) {
          if (isDebugEnabled) {
            LOG.debug("Split strategy: {}", splitStrategy);
          }

          // Hack note - different split strategies return differently typed lists, yay Java.
          // This works purely by magic, because we know which strategy produces which type.
          if (splitStrategy instanceof ETLSplitStrategy) {
            scheduleSplits((ETLSplitStrategy)splitStrategy,
                context, splitFutures, strategyFutures, splits);
          } else {
            @SuppressWarnings("unchecked")
            List<OrcSplit> readySplits = (List<OrcSplit>)splitStrategy.getSplits();
            splits.addAll(readySplits);
          }
        }
      }

      // Run the last combined strategy, if any.
      if (combinedCtx != null && combinedCtx.combined != null) {
        scheduleSplits(combinedCtx.combined, context, splitFutures, strategyFutures, splits);
        combinedCtx.combined = null;
      }

      // complete split futures
      for (Future<Void> ssFuture : strategyFutures) {
         ssFuture.get(); // Make sure we get exceptions strategies might have thrown.
      }
      // All the split strategies are done, so it must be safe to access splitFutures.
      for (Future<List<OrcSplit>> splitFuture : splitFutures) {
        splits.addAll(splitFuture.get());
      }
    } catch (Exception e) {
      cancelFutures(pathFutures);
      cancelFutures(strategyFutures);
      cancelFutures(splitFutures);
      throw new RuntimeException("ORC split generation failed with exception: " + e.getMessage(), e);
    }

    if (context.cacheStripeDetails) {
      LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/"
          + context.numFilesCounter.get());
    }

    if (isDebugEnabled) {
      for (OrcSplit split : splits) {
        LOG.debug(split + " projected_columns_uncompressed_size: "
            + split.getColumnarProjectionSize());
      }
    }
    return splits;
  }

  @VisibleForTesting
  // We could have this as a protected method w/no class, but half of Hive is static, so there.
  public static class ContextFactory {
    public Context create(Configuration conf, int numSplits) throws IOException {
      return new Context(conf, numSplits);
    }
  }

  private static void scheduleSplits(ETLSplitStrategy splitStrategy, Context context,
      List<Future<List<OrcSplit>>> splitFutures, List<Future<Void>> strategyFutures,
      List<OrcSplit> splits) throws IOException {
    Future<Void> ssFuture = splitStrategy.generateSplitWork(context, splitFutures, splits);
    if (ssFuture == null) return;
    strategyFutures.add(ssFuture);
  }

  private static <T> void cancelFutures(List<Future<T>> futures) {
    for (Future<T> future : futures) {
      future.cancel(true);
    }
  }

  private static SplitStrategy<?> combineOrCreateETLStrategy(CombinedCtx combinedCtx,
      Context context, FileSystem fs, Path dir, List<HdfsFileStatusWithId> files,
      List<DeltaMetaData> deltas, boolean[] covered, List<OrcProto.Type> readerTypes,
      boolean isOriginal, UserGroupInformation ugi, boolean allowSyntheticFileIds) {
    if (!deltas.isEmpty() || combinedCtx == null) {
      return new ETLSplitStrategy(
          context, fs, dir, files, readerTypes, isOriginal, deltas, covered, ugi,
          allowSyntheticFileIds);
    } else if (combinedCtx.combined == null) {
      combinedCtx.combined = new ETLSplitStrategy(
          context, fs, dir, files, readerTypes, isOriginal, deltas, covered, ugi,
          allowSyntheticFileIds);
      combinedCtx.combineStartUs = System.nanoTime();
      return null;
    } else {
      ETLSplitStrategy.CombineResult r =
          combinedCtx.combined.combineWith(fs, dir, files, isOriginal);
      switch (r) {
      case YES: return null;
      case NO_AND_CONTINUE:
        return new ETLSplitStrategy(
            context, fs, dir, files, readerTypes, isOriginal, deltas, covered, ugi,
            allowSyntheticFileIds);
      case NO_AND_SWAP: {
        ETLSplitStrategy oldBase = combinedCtx.combined;
        combinedCtx.combined = new ETLSplitStrategy(
            context, fs, dir, files, readerTypes, isOriginal, deltas, covered, ugi,
            allowSyntheticFileIds);
        combinedCtx.combineStartUs = System.nanoTime();
        return oldBase;
      }
      default: throw new AssertionError("Unknown result " + r);
      }
    }
  }

  @Override
  public InputSplit[] getSplits(JobConf job,
                                int numSplits) throws IOException {
    if (isDebugEnabled) {
      LOG.debug("getSplits started");
    }
    Configuration conf = job;
    if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_ORC_MS_FOOTER_CACHE_ENABLED)) {
      // Create HiveConf once, since this is expensive.
      conf = new HiveConf(conf, OrcInputFormat.class);
    }
    List<OrcSplit> result = generateSplitsInfo(conf,
        new Context(conf, numSplits, createExternalCaches()));
    if (isDebugEnabled) {
      LOG.debug("getSplits finished");
    }
    return result.toArray(new InputSplit[result.size()]);
  }

  @SuppressWarnings("unchecked")
  private org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct>
    createVectorizedReader(InputSplit split, JobConf conf, Reporter reporter
                           ) throws IOException {
    return (org.apache.hadoop.mapred.RecordReader)
      new VectorizedOrcInputFormat().getRecordReader(split, conf, reporter);
  }

  @Override
  public org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct>
  getRecordReader(InputSplit inputSplit, JobConf conf,
                  Reporter reporter) throws IOException {
    boolean vectorMode = Utilities.getUseVectorizedInputFileFormat(conf);
    boolean isAcidRead = isAcidRead(conf, inputSplit);
    if (!isAcidRead) {
      if (vectorMode) {
        return createVectorizedReader(inputSplit, conf, reporter);
      } else {
        OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf);
        if (inputSplit instanceof OrcSplit) {
          OrcSplit split = (OrcSplit) inputSplit;
          readerOptions.maxLength(split.getFileLength()).orcTail(split.getOrcTail());
        }
        return new OrcRecordReader(OrcFile.createReader(
            ((FileSplit) inputSplit).getPath(),
            readerOptions),
            conf, (FileSplit) inputSplit);
      }
    }

    reporter.setStatus(inputSplit.toString());

    boolean isFastVectorizedReaderAvailable =
        VectorizedOrcAcidRowBatchReader.canCreateVectorizedAcidRowBatchReaderOnSplit(conf, inputSplit);

    if (vectorMode && isFastVectorizedReaderAvailable) {
      // Faster vectorized ACID row batch reader is available that avoids row-by-row stitching.
      return (org.apache.hadoop.mapred.RecordReader)
          new VectorizedOrcAcidRowBatchReader(inputSplit, conf, reporter);
    }

    Options options = new Options(conf).reporter(reporter);
    final RowReader<OrcStruct> inner = getReader(inputSplit, options);
    if (vectorMode && !isFastVectorizedReaderAvailable) {
      // Vectorized regular ACID reader that does row-by-row stitching.
      return (org.apache.hadoop.mapred.RecordReader)
          new VectorizedOrcAcidRowReader(inner, conf,
              Utilities.getMapWork(conf).getVectorizedRowBatchCtx(), (FileSplit) inputSplit);
    } else {
      // Non-vectorized regular ACID reader.
      return new NullKeyRecordReader(inner, conf);
    }
  }

  /**
   * Return a RecordReader that is compatible with the Hive 0.12 reader
   * with NullWritable for the key instead of RecordIdentifier.
   */
  public static final class NullKeyRecordReader implements AcidRecordReader<NullWritable, OrcStruct> {
    private final RecordIdentifier id;
    private final RowReader<OrcStruct> inner;

    @Override
    public RecordIdentifier getRecordIdentifier() {
      return id;
    }
    private NullKeyRecordReader(RowReader<OrcStruct> inner, Configuration conf) {
      this.inner = inner;
      id = inner.createKey();
    }
    @Override
    public boolean next(NullWritable nullWritable,
                        OrcStruct orcStruct) throws IOException {
      return inner.next(id, orcStruct);
    }

    @Override
    public NullWritable createKey() {
      return NullWritable.get();
    }

    @Override
    public OrcStruct createValue() {
      return inner.createValue();
    }

    @Override
    public long getPos() throws IOException {
      return inner.getPos();
    }

    @Override
    public void close() throws IOException {
      inner.close();
    }

    @Override
    public float getProgress() throws IOException {
      return inner.getProgress();
    }
  }

  @Override
  public RowReader<OrcStruct> getReader(InputSplit inputSplit,
                                        Options options)
                                            throws IOException {

    final OrcSplit split = (OrcSplit) inputSplit;
    final Path path = split.getPath();

    Path root;
    if (split.hasBase()) {
      if (split.isOriginal()) {
        root = path.getParent();
      } else {
        root = path.getParent().getParent();
      }
    } else {
      root = path;
    }

    // Retrieve the acidOperationalProperties for the table, initialized in HiveInputFormat.
    AcidUtils.AcidOperationalProperties acidOperationalProperties
            = AcidUtils.getAcidOperationalProperties(options.getConfiguration());

    // The deltas are decided based on whether split-update has been turned on for the table or not.
    // When split-update is turned off, everything in the delta_x_y/ directory should be treated
    // as delta. However if split-update is turned on, only the files in delete_delta_x_y/ directory
    // need to be considered as delta, because files in delta_x_y/ will be processed as base files
    // since they only have insert events in them.
    final Path[] deltas =
        acidOperationalProperties.isSplitUpdate() ?
            AcidUtils.deserializeDeleteDeltas(root, split.getDeltas())
            : AcidUtils.deserializeDeltas(root, split.getDeltas());
    final Configuration conf = options.getConfiguration();

    final Reader reader = OrcInputFormat.createOrcReaderForSplit(conf, split);
    final int bucket = OrcInputFormat.getBucketForSplit(conf, split);
    final Reader.Options readOptions = OrcInputFormat.createOptionsForReader(conf);
    readOptions.range(split.getStart(), split.getLength());

    String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY);
    ValidTxnList validTxnList = txnString == null ? new ValidReadTxnList() :
      new ValidReadTxnList(txnString);
    final OrcRawRecordMerger records =
        new OrcRawRecordMerger(conf, true, reader, split.isOriginal(), bucket,
            validTxnList, readOptions, deltas);
    return new RowReader<OrcStruct>() {
      OrcStruct innerRecord = records.createValue();

      @Override
      public ObjectInspector getObjectInspector() {
        return OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(readOptions.getSchema()));
      }

      @Override
      public boolean next(RecordIdentifier recordIdentifier,
                          OrcStruct orcStruct) throws IOException {
        boolean result;
        // filter out the deleted records
        do {
          result = records.next(recordIdentifier, innerRecord);
        } while (result &&
            OrcRecordUpdater.getOperation(innerRecord) ==
                OrcRecordUpdater.DELETE_OPERATION);
        if (result) {
          // swap the fields with the passed in orcStruct
          orcStruct.linkFields(OrcRecordUpdater.getRow(innerRecord));
        }
        return result;
      }

      @Override
      public RecordIdentifier createKey() {
        return records.createKey();
      }

      @Override
      public OrcStruct createValue() {
        return new OrcStruct(records.getColumns());
      }

      @Override
      public long getPos() throws IOException {
        return records.getPos();
      }

      @Override
      public void close() throws IOException {
        records.close();
      }

      @Override
      public float getProgress() throws IOException {
        return records.getProgress();
      }
    };
  }

  static Path findOriginalBucket(FileSystem fs,
                                 Path directory,
                                 int bucket) throws IOException {
    for(FileStatus stat: fs.listStatus(directory)) {
      String name = stat.getPath().getName();
      String numberPart = name.substring(0, name.indexOf('_'));
      if (org.apache.commons.lang3.StringUtils.isNumeric(numberPart) &&
          Integer.parseInt(numberPart) == bucket) {
        return stat.getPath();
      }
    }
    throw new IllegalArgumentException("Can't find bucket " + bucket + " in " +
        directory);
  }

  static Reader.Options createOptionsForReader(Configuration conf) {
    /**
     * Do we have schema on read in the configuration variables?
     */
    TypeDescription schema =
        OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE);
    Reader.Options readerOptions = new Reader.Options().schema(schema);
    // TODO: Convert genIncludedColumns and setSearchArgument to use TypeDescription.
    final List<OrcProto.Type> schemaTypes = OrcUtils.getOrcTypes(schema);
    readerOptions.include(OrcInputFormat.genIncludedColumns(schema, conf));
    OrcInputFormat.setSearchArgument(readerOptions, schemaTypes, conf, true);
    return readerOptions;
  }

  static Reader createOrcReaderForSplit(Configuration conf, OrcSplit orcSplit) throws IOException {
    Path path = orcSplit.getPath();
    Reader reader;
    if (orcSplit.hasBase()) {
      OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf);
      readerOptions.maxLength(orcSplit.getFileLength());
      if (orcSplit.hasFooter()) {
        readerOptions.orcTail(orcSplit.getOrcTail());
      }
      reader = OrcFile.createReader(path, readerOptions);
    } else {
      reader = null;
    }
    return reader;
  }

  static int getBucketForSplit(Configuration conf, OrcSplit orcSplit) {
    if (orcSplit.hasBase()) {
      return AcidUtils.parseBaseOrDeltaBucketFilename(orcSplit.getPath(), conf).getBucket();
    } else {
      return (int) orcSplit.getStart();
    }
  }

  public static boolean[] pickStripesViaTranslatedSarg(SearchArgument sarg,
      OrcFile.WriterVersion writerVersion, List<OrcProto.Type> types,
      List<StripeStatistics> stripeStats, int stripeCount) {
    LOG.info("Translated ORC pushdown predicate: " + sarg);
    assert sarg != null;
    if (stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) {
      return null; // only do split pruning if HIVE-8732 has been fixed in the writer
    }
    // eliminate stripes that doesn't satisfy the predicate condition
    List<PredicateLeaf> sargLeaves = sarg.getLeaves();
    int[] filterColumns = RecordReaderImpl.mapTranslatedSargColumns(types, sargLeaves);
    TypeDescription schema = OrcUtils.convertTypeFromProtobuf(types, 0);
    SchemaEvolution evolution = new SchemaEvolution(schema, null);
    return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, null, evolution);
  }

  private static boolean[] pickStripes(SearchArgument sarg,
                                       OrcFile.WriterVersion writerVersion,
                                       List<StripeStatistics> stripeStats,
      int stripeCount, Path filePath, final SchemaEvolution evolution) {
    if (sarg == null || stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) {
      return null; // only do split pruning if HIVE-8732 has been fixed in the writer
    }
    // eliminate stripes that doesn't satisfy the predicate condition
    List<PredicateLeaf> sargLeaves = sarg.getLeaves();
    int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(sargLeaves,
        evolution);
    return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath, evolution);
  }

  private static boolean[] pickStripesInternal(SearchArgument sarg, int[] filterColumns,
      List<StripeStatistics> stripeStats, int stripeCount, Path filePath,
      final SchemaEvolution evolution) {
    boolean[] includeStripe = new boolean[stripeCount];
    for (int i = 0; i < includeStripe.length; ++i) {
      includeStripe[i] = (i >= stripeStats.size()) ||
          isStripeSatisfyPredicate(stripeStats.get(i), sarg, filterColumns, evolution);
      if (isDebugEnabled && !includeStripe[i]) {
        LOG.debug("Eliminating ORC stripe-" + i + " of file '" + filePath
            + "'  as it did not satisfy predicate condition.");
      }
    }
    return includeStripe;
  }

  private static boolean isStripeSatisfyPredicate(
      StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns,
      final SchemaEvolution evolution) {
    List<PredicateLeaf> predLeaves = sarg.getLeaves();
    TruthValue[] truthValues = new TruthValue[predLeaves.size()];
    for (int pred = 0; pred < truthValues.length; pred++) {
      if (filterColumns[pred] != -1) {
        if (evolution != null && !evolution.isPPDSafeConversion(filterColumns[pred])) {
          truthValues[pred] = TruthValue.YES_NO_NULL;
        } else {
          // column statistics at index 0 contains only the number of rows
          ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
          truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null);
        }
      } else {

        // parition column case.
        // partition filter will be evaluated by partition pruner so
        // we will not evaluate partition filter here.
        truthValues[pred] = TruthValue.YES_NO_NULL;
      }
    }
    return sarg.evaluate(truthValues).isNeeded();
  }

  @VisibleForTesting
  static List<SplitStrategy<?>> determineSplitStrategies(CombinedCtx combinedCtx, Context context,
      FileSystem fs, Path dir, AcidUtils.Directory dirInfo,
      List<AcidBaseFileInfo> baseFiles,
      List<ParsedDelta> parsedDeltas,
      List<OrcProto.Type> readerTypes,
      UserGroupInformation ugi, boolean allowSyntheticFileIds) {
    List<SplitStrategy<?>> splitStrategies = new ArrayList<SplitStrategy<?>>();
    SplitStrategy<?> splitStrategy;

    // When no baseFiles, we will just generate a single split strategy and return.
    List<HdfsFileStatusWithId> acidSchemaFiles = new ArrayList<HdfsFileStatusWithId>();
    if (baseFiles.isEmpty()) {
      splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, dirInfo,
          acidSchemaFiles, false, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds);
      if (splitStrategy != null) {
        splitStrategies.add(splitStrategy);
      }
      return splitStrategies; // return here
    }

    List<HdfsFileStatusWithId> originalSchemaFiles = new ArrayList<HdfsFileStatusWithId>();
    // Separate the base files into acid schema and non-acid(original) schema files.
    for (AcidBaseFileInfo acidBaseFileInfo : baseFiles) {
      if (acidBaseFileInfo.isOriginal()) {
        originalSchemaFiles.add(acidBaseFileInfo.getHdfsFileStatusWithId());
      } else {
        acidSchemaFiles.add(acidBaseFileInfo.getHdfsFileStatusWithId());
      }
    }

    // Generate split strategy for non-acid schema original files, if any.
    if (!originalSchemaFiles.isEmpty()) {
      splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, dirInfo,
          originalSchemaFiles, true, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds);
      if (splitStrategy != null) {
        splitStrategies.add(splitStrategy);
      }
    }

    // Generate split strategy for acid schema files, if any.
    if (!acidSchemaFiles.isEmpty()) {
      splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, dirInfo,
          acidSchemaFiles, false, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds);
      if (splitStrategy != null) {
        splitStrategies.add(splitStrategy);
      }
    }

    return splitStrategies;
  }

  @VisibleForTesting
  static SplitStrategy<?> determineSplitStrategy(CombinedCtx combinedCtx, Context context,
      FileSystem fs, Path dir, AcidUtils.Directory dirInfo,
      List<HdfsFileStatusWithId> baseFiles,
      boolean isOriginal,
      List<ParsedDelta> parsedDeltas,
      List<OrcProto.Type> readerTypes,
      UserGroupInformation ugi, boolean allowSyntheticFileIds) {
    List<DeltaMetaData> deltas = AcidUtils.serializeDeltas(parsedDeltas);
    boolean[] covered = new boolean[context.numBuckets];

    // if we have a base to work from
    if (!baseFiles.isEmpty()) {
      long totalFileSize = 0;
      for (HdfsFileStatusWithId child : baseFiles) {
        totalFileSize += child.getFileStatus().getLen();
        AcidOutputFormat.Options opts = AcidUtils.parseBaseOrDeltaBucketFilename
            (child.getFileStatus().getPath(), context.conf);
        opts.writingBase(true);
        int b = opts.getBucket();
        // If the bucket is in the valid range, mark it as covered.
        // I wish Hive actually enforced bucketing all of the time.
        if (b >= 0 && b < covered.length) {
          covered[b] = true;
        }
      }

      int numFiles = baseFiles.size();
      long avgFileSize = totalFileSize / numFiles;
      int totalFiles = context.numFilesCounter.addAndGet(numFiles);
      switch(context.splitStrategyKind) {
        case BI:
          // BI strategy requested through config
          return new BISplitStrategy(context, fs, dir, baseFiles,
              isOriginal, deltas, covered, allowSyntheticFileIds);
        case ETL:
          // ETL strategy requested through config
          return combineOrCreateETLStrategy(combinedCtx, context, fs, dir, baseFiles,
              deltas, covered, readerTypes, isOriginal, ugi, allowSyntheticFileIds);
        default:
          // HYBRID strategy
          if (avgFileSize > context.maxSize || totalFiles <= context.etlFileThreshold) {
            return combineOrCreateETLStrategy(combinedCtx, context, fs, dir, baseFiles,
                deltas, covered, readerTypes, isOriginal, ugi, allowSyntheticFileIds);
          } else {
            return new BISplitStrategy(context, fs, dir, baseFiles,
                isOriginal, deltas, covered, allowSyntheticFileIds);
          }
      }
    } else {
      // no base, only deltas
      return new ACIDSplitStrategy(dir, context.numBuckets, deltas, covered,
          context.acidOperationalProperties);
    }
  }

  @Override
  public RawReader<OrcStruct> getRawReader(Configuration conf,
                                           boolean collapseEvents,
                                           int bucket,
                                           ValidTxnList validTxnList,
                                           Path baseDirectory,
                                           Path[] deltaDirectory
                                           ) throws IOException {
    Reader reader = null;
    boolean isOriginal = false;
    if (baseDirectory != null) {
      Path bucketFile;
      if (baseDirectory.getName().startsWith(AcidUtils.BASE_PREFIX)) {
        bucketFile = AcidUtils.createBucketFile(baseDirectory, bucket);
      } else {
        isOriginal = true;
        bucketFile = findOriginalBucket(baseDirectory.getFileSystem(conf),
            baseDirectory, bucket);
      }
      reader = OrcFile.createReader(bucketFile, OrcFile.readerOptions(conf));
    }
    return new OrcRawRecordMerger(conf, collapseEvents, reader, isOriginal,
        bucket, validTxnList, new Reader.Options(), deltaDirectory);
  }

  /**
   * Represents footer cache.
   */
  public interface FooterCache {
    ByteBuffer NO_SPLIT_AFTER_PPD = ByteBuffer.wrap(new byte[0]);

    void getAndValidate(List<HdfsFileStatusWithId> files, boolean isOriginal,
        OrcTail[] result, ByteBuffer[] ppdResult) throws IOException, HiveException;
    boolean hasPpd();
    boolean isBlocking();
    void put(FooterCacheKey cacheKey, OrcTail orcTail) throws IOException;
  }

  public static class FooterCacheKey {
    Long fileId; // used by external cache
    Path path; // used by local cache

    FooterCacheKey(Long fileId, Path path) {
      this.fileId = fileId;
      this.path = path;
    }

    public Long getFileId() {
      return fileId;
    }

    public Path getPath() {
      return path;
    }
  }
  /**
   * Convert a Hive type property string that contains separated type names into a list of
   * TypeDescription objects.
   * @param hiveTypeProperty the desired types from hive
   * @param maxColumns the maximum number of desired columns
   * @return the list of TypeDescription objects.
   */
  public static ArrayList<TypeDescription>
      typeDescriptionsFromHiveTypeProperty(String hiveTypeProperty,
                                           int maxColumns) {

    // CONSDIER: We need a type name parser for TypeDescription.

    ArrayList<TypeInfo> typeInfoList = TypeInfoUtils.getTypeInfosFromTypeString(hiveTypeProperty);
    ArrayList<TypeDescription> typeDescrList =new ArrayList<TypeDescription>(typeInfoList.size());
    for (TypeInfo typeInfo : typeInfoList) {
      typeDescrList.add(convertTypeInfo(typeInfo));
      if (typeDescrList.size() >= maxColumns) {
        break;
      }
    }
    return typeDescrList;
  }

  public static TypeDescription convertTypeInfo(TypeInfo info) {
    switch (info.getCategory()) {
      case PRIMITIVE: {
        PrimitiveTypeInfo pinfo = (PrimitiveTypeInfo) info;
        switch (pinfo.getPrimitiveCategory()) {
          case BOOLEAN:
            return TypeDescription.createBoolean();
          case BYTE:
            return TypeDescription.createByte();
          case SHORT:
            return TypeDescription.createShort();
          case INT:
            return TypeDescription.createInt();
          case LONG:
            return TypeDescription.createLong();
          case FLOAT:
            return TypeDescription.createFloat();
          case DOUBLE:
            return TypeDescription.createDouble();
          case STRING:
            return TypeDescription.createString();
          case DATE:
            return TypeDescription.createDate();
          case TIMESTAMP:
            return TypeDescription.createTimestamp();
          case BINARY:
            return TypeDescription.createBinary();
          case DECIMAL: {
            DecimalTypeInfo dinfo = (DecimalTypeInfo) pinfo;
            return TypeDescription.createDecimal()
                .withScale(dinfo.getScale())
                .withPrecision(dinfo.getPrecision());
          }
          case VARCHAR: {
            BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo;
            return TypeDescription.createVarchar()
                .withMaxLength(cinfo.getLength());
          }
          case CHAR: {
            BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo;
            return TypeDescription.createChar()
                .withMaxLength(cinfo.getLength());
          }
          default:
            throw new IllegalArgumentException("ORC doesn't handle primitive" +
                " category " + pinfo.getPrimitiveCategory());
        }
      }
      case LIST: {
        ListTypeInfo linfo = (ListTypeInfo) info;
        return TypeDescription.createList
            (convertTypeInfo(linfo.getListElementTypeInfo()));
      }
      case MAP: {
        MapTypeInfo minfo = (MapTypeInfo) info;
        return TypeDescription.createMap
            (convertTypeInfo(minfo.getMapKeyTypeInfo()),
                convertTypeInfo(minfo.getMapValueTypeInfo()));
      }
      case UNION: {
        UnionTypeInfo minfo = (UnionTypeInfo) info;
        TypeDescription result = TypeDescription.createUnion();
        for (TypeInfo child: minfo.getAllUnionObjectTypeInfos()) {
          result.addUnionChild(convertTypeInfo(child));
        }
        return result;
      }
      case STRUCT: {
        StructTypeInfo sinfo = (StructTypeInfo) info;
        TypeDescription result = TypeDescription.createStruct();
        for(String fieldName: sinfo.getAllStructFieldNames()) {
          result.addField(fieldName,
              convertTypeInfo(sinfo.getStructFieldTypeInfo(fieldName)));
        }
        return result;
      }
      default:
        throw new IllegalArgumentException("ORC doesn't handle " +
            info.getCategory());
    }
  }

  /**
   * Generate the desired schema for reading the file.
   * @param conf the configuration
   * @param isAcidRead is this an acid format?
   * @param dataColumns the desired number of data columns for vectorized read
   * @return the desired schema or null if schema evolution isn't enabled
   * @throws IllegalArgumentException
   */
  public static TypeDescription getDesiredRowTypeDescr(Configuration conf,
                                                       boolean isAcidRead,
                                                       int dataColumns) {

    String columnNameProperty = null;
    String columnTypeProperty = null;

    ArrayList<String> schemaEvolutionColumnNames = null;
    ArrayList<TypeDescription> schemaEvolutionTypeDescrs = null;

    boolean haveSchemaEvolutionProperties = false;
    if (isAcidRead || HiveConf.getBoolVar(conf, ConfVars.HIVE_SCHEMA_EVOLUTION) ) {

      columnNameProperty = conf.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS);
      columnTypeProperty = conf.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES);

      haveSchemaEvolutionProperties =
          (columnNameProperty != null && columnTypeProperty != null);

      if (haveSchemaEvolutionProperties) {
        schemaEvolutionColumnNames = Lists.newArrayList(columnNameProperty.split(","));
        if (schemaEvolutionColumnNames.size() == 0) {
          haveSchemaEvolutionProperties = false;
        } else {
          schemaEvolutionTypeDescrs =
              typeDescriptionsFromHiveTypeProperty(columnTypeProperty,
                  dataColumns);
          if (schemaEvolutionTypeDescrs.size() !=
              Math.min(dataColumns, schemaEvolutionColumnNames.size())) {
            haveSchemaEvolutionProperties = false;
          }
        }
      } else if (isAcidRead) {
        throw new IllegalArgumentException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg());
      }
    }

    if (haveSchemaEvolutionProperties) {
      if (LOG.isInfoEnabled()) {
        LOG.info("Using schema evolution configuration variables schema.evolution.columns " +
            schemaEvolutionColumnNames.toString() +
            " / schema.evolution.columns.types " +
            schemaEvolutionTypeDescrs.toString() +
            " (isAcidRead " + isAcidRead + ")");
      }
    } else {

      // Try regular properties;
      columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS);
      columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);
      if (columnTypeProperty == null || columnNameProperty == null) {
        return null;
      }

      schemaEvolutionColumnNames = Lists.newArrayList(columnNameProperty.split(","));
      if (schemaEvolutionColumnNames.size() == 0) {
        return null;
      }
      schemaEvolutionTypeDescrs =
          typeDescriptionsFromHiveTypeProperty(columnTypeProperty, dataColumns);
      if (schemaEvolutionTypeDescrs.size() !=
          Math.min(dataColumns, schemaEvolutionColumnNames.size())) {
        return null;
      }

      // Find first virtual column and clip them off.
      int virtualColumnClipNum = -1;
      int columnNum = 0;
      for (String columnName : schemaEvolutionColumnNames) {
        if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(columnName)) {
          virtualColumnClipNum = columnNum;
          break;
        }
        columnNum++;
      }
      if (virtualColumnClipNum != -1 && virtualColumnClipNum < dataColumns) {
        schemaEvolutionColumnNames =
            Lists.newArrayList(schemaEvolutionColumnNames.subList(0, virtualColumnClipNum));
        schemaEvolutionTypeDescrs = Lists.newArrayList(schemaEvolutionTypeDescrs.subList(0, virtualColumnClipNum));
      }

      if (LOG.isInfoEnabled()) {
        LOG.info("Using column configuration variables columns " +
                schemaEvolutionColumnNames.toString() +
                " / columns.types " +
                schemaEvolutionTypeDescrs.toString() +
                " (isAcidRead " + isAcidRead + ")");
      }
    }

    // Desired schema does not include virtual columns or partition columns.
    TypeDescription result = TypeDescription.createStruct();
    for (int i = 0; i < schemaEvolutionTypeDescrs.size(); i++) {
      result.addField(schemaEvolutionColumnNames.get(i), schemaEvolutionTypeDescrs.get(i));
    }

    return result;
  }

  @VisibleForTesting
  protected ExternalFooterCachesByConf createExternalCaches() {
    return null; // The default ones are created in case of null; tests override this.
  }


  @Override
  public BatchToRowReader<?, ?> getWrapper(
      org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch> vrr,
      VectorizedRowBatchCtx vrbCtx, List<Integer> includedCols) {
    return new OrcOiBatchToRowReader(vrr, vrbCtx, includedCols);
  }
}