TestInputOutputFormat.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.io.orc;

import static org.junit.Assert.*;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.security.PrivilegedExceptionAction;
import java.sql.Date;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TimeZone;
import java.util.TreeSet;

import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapper;
import org.apache.hadoop.hive.ql.exec.tez.ColumnarSplitSizeEstimator;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
import org.apache.hadoop.hive.ql.io.AcidInputFormat;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.IOConstants;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.ql.io.RecordUpdater;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategy;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.shims.CombineHiveKey;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Progressable;
import org.apache.orc.OrcConf;
import org.apache.orc.OrcProto;
import org.apache.orc.TypeDescription;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Output;

@SuppressWarnings({ "deprecation", "unchecked", "rawtypes" })
public class TestInputOutputFormat {
  private static final Logger LOG = LoggerFactory.getLogger(TestInputOutputFormat.class);

  public static String toKryo(SearchArgument sarg) {
    Output out = new Output(4 * 1024, 10 * 1024 * 1024);
    new Kryo().writeObject(out, sarg);
    out.close();
    return Base64.encodeBase64String(out.toBytes());
  }

  Path workDir = new Path(System.getProperty("test.tmp.dir","target/tmp"));
  static final int MILLIS_IN_DAY = 1000 * 60 * 60 * 24;
  private static final SimpleDateFormat DATE_FORMAT =
      new SimpleDateFormat("yyyy/MM/dd");
  private static final SimpleDateFormat TIME_FORMAT =
      new SimpleDateFormat("yyyy/MM/dd HH:mm:ss.SSS");
  private static final TimeZone LOCAL_TIMEZONE = TimeZone.getDefault();

  static {
    TimeZone gmt = TimeZone.getTimeZone("GMT+0");
    DATE_FORMAT.setTimeZone(gmt);
    TIME_FORMAT.setTimeZone(gmt);
  }

  public static class BigRow implements Writable {
    boolean booleanValue;
    byte byteValue;
    short shortValue;
    int intValue;
    long longValue;
    float floatValue;
    double doubleValue;
    String stringValue;
    HiveDecimal decimalValue;
    Date dateValue;
    Timestamp timestampValue;

    BigRow(long x) {
      booleanValue = x % 2 == 0;
      byteValue = (byte) x;
      shortValue = (short) x;
      intValue = (int) x;
      longValue = x;
      floatValue = x;
      doubleValue = x;
      stringValue = Long.toHexString(x);
      decimalValue = HiveDecimal.create(x);
      long millisUtc = x * MILLIS_IN_DAY;
      millisUtc -= LOCAL_TIMEZONE.getOffset(millisUtc);
      dateValue = new Date(millisUtc);
      timestampValue = new Timestamp(millisUtc);
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
      throw new UnsupportedOperationException("no write");
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
      throw new UnsupportedOperationException("no read");
    }

    @Override
    public String toString() {
      StringBuilder builder = new StringBuilder();
      builder.append("bigrow{booleanValue: ");
      builder.append(booleanValue);
      builder.append(", byteValue: ");
      builder.append(byteValue);
      builder.append(", shortValue: ");
      builder.append(shortValue);
      builder.append(", intValue: ");
      builder.append(intValue);
      builder.append(", longValue: ");
      builder.append(longValue);
      builder.append(", floatValue: ");
      builder.append(floatValue);
      builder.append(", doubleValue: ");
      builder.append(doubleValue);
      builder.append(", stringValue: ");
      builder.append(stringValue);
      builder.append(", decimalValue: ");
      builder.append(decimalValue);
      builder.append(", dateValue: ");
      builder.append(DATE_FORMAT.format(dateValue));
      builder.append(", timestampValue: ");
      builder.append(TIME_FORMAT.format(timestampValue));
      builder.append("}");
      return builder.toString();
    }


    static String getColumnNamesProperty() {
      return "booleanValue,byteValue,shortValue,intValue,longValue,floatValue,doubleValue,stringValue,decimalValue,dateValue,timestampValue";
    }
    static String getColumnTypesProperty() {
      return "boolean:tinyint:smallint:int:bigint:float:double:string:decimal:date:timestamp";
    }
  }

  public static class BigRowField implements StructField {
    private final int id;
    private final String fieldName;
    private final ObjectInspector inspector;

    BigRowField(int id, String fieldName, ObjectInspector inspector) {
      this.id = id;
      this.fieldName = fieldName;
      this.inspector = inspector;
    }

    @Override
    public String getFieldName() {
      return fieldName;
    }

    @Override
    public ObjectInspector getFieldObjectInspector() {
      return inspector;
    }

    @Override
    public String getFieldComment() {
      return null;
    }

    @Override
    public int getFieldID() {
      return id;
    }

    @Override
    public String toString() {
      return "field " + id + " " + fieldName;
    }
  }

  public static class BigRowInspector extends StructObjectInspector {
    static final List<BigRowField> FIELDS = new ArrayList<BigRowField>();
    static {
      FIELDS.add(new BigRowField(0, "booleanValue",
          PrimitiveObjectInspectorFactory.javaBooleanObjectInspector));
      FIELDS.add(new BigRowField(1, "byteValue",
          PrimitiveObjectInspectorFactory.javaByteObjectInspector));
      FIELDS.add(new BigRowField(2, "shortValue",
          PrimitiveObjectInspectorFactory.javaShortObjectInspector));
      FIELDS.add(new BigRowField(3, "intValue",
          PrimitiveObjectInspectorFactory.javaIntObjectInspector));
      FIELDS.add(new BigRowField(4, "longValue",
          PrimitiveObjectInspectorFactory.javaLongObjectInspector));
      FIELDS.add(new BigRowField(5, "floatValue",
          PrimitiveObjectInspectorFactory.javaFloatObjectInspector));
      FIELDS.add(new BigRowField(6, "doubleValue",
          PrimitiveObjectInspectorFactory.javaDoubleObjectInspector));
      FIELDS.add(new BigRowField(7, "stringValue",
          PrimitiveObjectInspectorFactory.javaStringObjectInspector));
      FIELDS.add(new BigRowField(8, "decimalValue",
          PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector));
      FIELDS.add(new BigRowField(9, "dateValue",
          PrimitiveObjectInspectorFactory.javaDateObjectInspector));
      FIELDS.add(new BigRowField(10, "timestampValue",
          PrimitiveObjectInspectorFactory.javaTimestampObjectInspector));
    }


    @Override
    public List<? extends StructField> getAllStructFieldRefs() {
      return FIELDS;
    }

    @Override
    public StructField getStructFieldRef(String fieldName) {
      for(StructField field: FIELDS) {
        if (field.getFieldName().equals(fieldName)) {
          return field;
        }
      }
      throw new IllegalArgumentException("Can't find field " + fieldName);
    }

    @Override
    public Object getStructFieldData(Object data, StructField fieldRef) {
      BigRow obj = (BigRow) data;
      switch (((BigRowField) fieldRef).id) {
        case 0:
          return obj.booleanValue;
        case 1:
          return obj.byteValue;
        case 2:
          return obj.shortValue;
        case 3:
          return obj.intValue;
        case 4:
          return obj.longValue;
        case 5:
          return obj.floatValue;
        case 6:
          return obj.doubleValue;
        case 7:
          return obj.stringValue;
        case 8:
          return obj.decimalValue;
        case 9:
          return obj.dateValue;
        case 10:
          return obj.timestampValue;
      }
      throw new IllegalArgumentException("No such field " + fieldRef);
    }

    @Override
    public List<Object> getStructFieldsDataAsList(Object data) {
      BigRow obj = (BigRow) data;
      List<Object> result = new ArrayList<Object>(11);
      result.add(obj.booleanValue);
      result.add(obj.byteValue);
      result.add(obj.shortValue);
      result.add(obj.intValue);
      result.add(obj.longValue);
      result.add(obj.floatValue);
      result.add(obj.doubleValue);
      result.add(obj.stringValue);
      result.add(obj.decimalValue);
      result.add(obj.dateValue);
      result.add(obj.timestampValue);
      return result;
    }

    @Override
    public String getTypeName() {
      return "struct<booleanValue:boolean,byteValue:tinyint," +
          "shortValue:smallint,intValue:int,longValue:bigint," +
          "floatValue:float,doubleValue:double,stringValue:string," +
          "decimalValue:decimal>";
    }

    @Override
    public Category getCategory() {
      return Category.STRUCT;
    }
  }

  public static class MyRow implements Writable {
    int x;
    int y;
    MyRow(int x, int y) {
      this.x = x;
      this.y = y;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
      throw new UnsupportedOperationException("no write");
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
     throw new UnsupportedOperationException("no read");
    }


    static String getColumnNamesProperty() {
      return "x,y";
    }
    static String getColumnTypesProperty() {
      return "int:int";
    }

  }

  @Rule
  public TestName testCaseName = new TestName();
  JobConf conf;
  FileSystem fs;
  Path testFilePath;

  @Before
  public void openFileSystem () throws Exception {
    conf = new JobConf();
    fs = FileSystem.getLocal(conf);
    testFilePath = new Path(workDir, "TestInputOutputFormat." +
        testCaseName.getMethodName() + ".orc");
    fs.delete(testFilePath, false);
  }

  @Test
  public void testOverlap() throws Exception {
    assertEquals(0, OrcInputFormat.SplitGenerator.getOverlap(100, 100,
        200, 100));
    assertEquals(0, OrcInputFormat.SplitGenerator.getOverlap(0, 1000,
        2000, 100));
    assertEquals(100, OrcInputFormat.SplitGenerator.getOverlap(1000, 1000,
        1500, 100));
    assertEquals(250, OrcInputFormat.SplitGenerator.getOverlap(1000, 250,
        500, 2000));
    assertEquals(100, OrcInputFormat.SplitGenerator.getOverlap(1000, 1000,
        1900, 1000));
    assertEquals(500, OrcInputFormat.SplitGenerator.getOverlap(2000, 1000,
        2500, 2000));
  }

  @Test
  public void testGetInputPaths() throws Exception {
    conf.set("mapred.input.dir", "a,b,c");
    assertArrayEquals(new Path[]{new Path("a"), new Path("b"), new Path("c")},
        OrcInputFormat.getInputPaths(conf));
    conf.set("mapred.input.dir", "/a/b/c/d/e");
    assertArrayEquals(new Path[]{new Path("/a/b/c/d/e")},
        OrcInputFormat.getInputPaths(conf));
    conf.set("mapred.input.dir", "/a/b/c\\,d,/e/f\\,g/h");
    assertArrayEquals(new Path[]{new Path("/a/b/c,d"), new Path("/e/f,g/h")},
        OrcInputFormat.getInputPaths(conf));
  }

  private FileSystem generateMockFiles(final int count, final int size) {
    final byte[] data = new byte[size];
    MockFile[] files = new MockFile[count];
    for (int i = 0; i < count; i++) {
      files[i] = new MockFile(String.format("mock:/a/b/part-%d", i), size, data);
    }
    return new MockFileSystem(conf, files);
  }

  @Test
  public void testSplitStrategySelection() throws Exception {

    conf.set("mapreduce.input.fileinputformat.split.maxsize", "500");
    conf.set(HiveConf.ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_MEMORY_SIZE.varname, "10Mb");
    final int[] counts = { 1, 10, 100, 256 };
    final int[] sizes = { 100, 1000 };
    final int[] numSplits = { 1, 9, 10, 11, 99, 111 };
    final String[] strategyResults = new String[] {
    "ETLSplitStrategy", /* 1 files x 100 size for 1 splits */
    "ETLSplitStrategy", /* 1 files x 100 size for 9 splits */
    "ETLSplitStrategy", /* 1 files x 100 size for 10 splits */
    "ETLSplitStrategy", /* 1 files x 100 size for 11 splits */
    "ETLSplitStrategy", /* 1 files x 100 size for 99 splits */
    "ETLSplitStrategy", /* 1 files x 100 size for 111 splits */
    "ETLSplitStrategy", /* 1 files x 1000 size for 1 splits */
    "ETLSplitStrategy", /* 1 files x 1000 size for 9 splits */
    "ETLSplitStrategy", /* 1 files x 1000 size for 10 splits */
    "ETLSplitStrategy", /* 1 files x 1000 size for 11 splits */
    "ETLSplitStrategy", /* 1 files x 1000 size for 99 splits */
    "ETLSplitStrategy", /* 1 files x 1000 size for 111 splits */
    "BISplitStrategy", /* 10 files x 100 size for 1 splits */
    "BISplitStrategy", /* 10 files x 100 size for 9 splits */
    "ETLSplitStrategy", /* 10 files x 100 size for 10 splits */
    "ETLSplitStrategy", /* 10 files x 100 size for 11 splits */
    "ETLSplitStrategy", /* 10 files x 100 size for 99 splits */
    "ETLSplitStrategy", /* 10 files x 100 size for 111 splits */
    "ETLSplitStrategy", /* 10 files x 1000 size for 1 splits */
    "ETLSplitStrategy", /* 10 files x 1000 size for 9 splits */
    "ETLSplitStrategy", /* 10 files x 1000 size for 10 splits */
    "ETLSplitStrategy", /* 10 files x 1000 size for 11 splits */
    "ETLSplitStrategy", /* 10 files x 1000 size for 99 splits */
    "ETLSplitStrategy", /* 10 files x 1000 size for 111 splits */
    "BISplitStrategy", /* 100 files x 100 size for 1 splits */
    "BISplitStrategy", /* 100 files x 100 size for 9 splits */
    "BISplitStrategy", /* 100 files x 100 size for 10 splits */
    "BISplitStrategy", /* 100 files x 100 size for 11 splits */
    "BISplitStrategy", /* 100 files x 100 size for 99 splits */
    "ETLSplitStrategy", /* 100 files x 100 size for 111 splits */
    "ETLSplitStrategy", /* 100 files x 1000 size for 1 splits */
    "ETLSplitStrategy", /* 100 files x 1000 size for 9 splits */
    "ETLSplitStrategy", /* 100 files x 1000 size for 10 splits */
    "ETLSplitStrategy", /* 100 files x 1000 size for 11 splits */
    "ETLSplitStrategy", /* 100 files x 1000 size for 99 splits */
    "ETLSplitStrategy", /* 100 files x 1000 size for 111 splits */
    "BISplitStrategy", /* 256 files x 100 size for 1 splits */
    "BISplitStrategy", /* 256 files x 100 size for 9 splits */
    "BISplitStrategy", /* 256 files x 100 size for 10 splits */
    "BISplitStrategy", /* 256 files x 100 size for 11 splits */
    "BISplitStrategy", /* 256 files x 100 size for 99 splits */
    "BISplitStrategy", /* 256 files x 100 size for 111 splits */
    "ETLSplitStrategy", /* 256 files x 1000 size for 1 splits */
    "ETLSplitStrategy", /* 256 files x 1000 size for 9 splits */
    "ETLSplitStrategy", /* 256 files x 1000 size for 10 splits */
    "ETLSplitStrategy", /* 256 files x 1000 size for 11 splits */
    "ETLSplitStrategy", /* 256 files x 1000 size for 99 splits */
    "ETLSplitStrategy", /* 256 files x 1000 size for 111 splits */
    };

    int k = 0;

    for (int c : counts) {
      for (int s : sizes) {
        final FileSystem fs = generateMockFiles(c, s);
        for (int n : numSplits) {
          final OrcInputFormat.Context context = new OrcInputFormat.Context(
              conf, n);
          OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(
              context, fs, new MockPath(fs, "mock:/a/b"), false, null);
          List<SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen);
          assertEquals(1, splitStrategies.size());
          final SplitStrategy splitStrategy = splitStrategies.get(0);
          assertTrue(
              String.format(
                  "Split strategy for %d files x %d size for %d splits", c, s,
                  n),
              splitStrategy.getClass().getSimpleName()
                  .equals(strategyResults[k++]));
        }
      }
    }

    k = 0;
    conf.set(ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_MEMORY_SIZE.varname, "0");
    for (int c : counts) {
      for (int s : sizes) {
        final FileSystem fs = generateMockFiles(c, s);
        for (int n : numSplits) {
          final OrcInputFormat.Context context = new OrcInputFormat.Context(
              conf, n);
          OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(
              context, fs, new MockPath(fs, "mock:/a/b"), false, null);
          List<SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen);
          assertEquals(1, splitStrategies.size());
          final SplitStrategy splitStrategy = splitStrategies.get(0);
          assertTrue(
              String.format(
                  "Split strategy for %d files x %d size for %d splits", c, s,
                  n),
              splitStrategy.getClass().getSimpleName()
                  .equals(strategyResults[k++]));
        }
      }
    }
  }

  @Test
  public void testFileGenerator() throws Exception {
    OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
    MockFileSystem fs = new MockFileSystem(conf,
        new MockFile("mock:/a/b/part-00", 1000, new byte[1]),
        new MockFile("mock:/a/b/part-01", 1000, new byte[1]),
        new MockFile("mock:/a/b/_part-02", 1000, new byte[1]),
        new MockFile("mock:/a/b/.part-03", 1000, new byte[1]),
        new MockFile("mock:/a/b/part-04", 1000, new byte[1]));
    OrcInputFormat.FileGenerator gen =
      new OrcInputFormat.FileGenerator(context, fs,
          new MockPath(fs, "mock:/a/b"), false, null);
    List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen);
    assertEquals(1, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.BISplitStrategy);

    conf.set("mapreduce.input.fileinputformat.split.maxsize", "500");
    context = new OrcInputFormat.Context(conf);
    fs = new MockFileSystem(conf,
        new MockFile("mock:/a/b/part-00", 1000, new byte[1000]),
        new MockFile("mock:/a/b/part-01", 1000, new byte[1000]),
        new MockFile("mock:/a/b/_part-02", 1000, new byte[1000]),
        new MockFile("mock:/a/b/.part-03", 1000, new byte[1000]),
        new MockFile("mock:/a/b/part-04", 1000, new byte[1000]));
    gen = new OrcInputFormat.FileGenerator(context, fs,
            new MockPath(fs, "mock:/a/b"), false, null);
    splitStrategies = createSplitStrategies(context, gen);
    assertEquals(1, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ETLSplitStrategy);
  }

  @Test
  public void testACIDSplitStrategy() throws Exception {
    conf.set("bucket_count", "2");
    OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
    MockFileSystem fs = new MockFileSystem(conf,
        new MockFile("mock:/a/delta_000_001/part-00", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delta_000_001/part-01", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delta_001_002/part-02", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delta_001_002/part-03", 1000, new byte[1], new MockBlock("host1")));
    OrcInputFormat.FileGenerator gen =
        new OrcInputFormat.FileGenerator(context, fs,
            new MockPath(fs, "mock:/a"), false, null);
    List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen);
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy);
    List<OrcSplit> splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(0)).getSplits();
    ColumnarSplitSizeEstimator splitSizeEstimator = new ColumnarSplitSizeEstimator();
    for (OrcSplit split: splits) {
      assertEquals(Integer.MAX_VALUE, splitSizeEstimator.getEstimatedSize(split));
    }
    assertEquals(2, splits.size());
  }

  @Test
  public void testACIDSplitStrategyForSplitUpdate() throws Exception {
    conf.set("bucket_count", "2");
    conf.set(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true");
    conf.set(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, "default");
    OrcInputFormat.Context context = new OrcInputFormat.Context(conf);

    // Case 1: Test with just originals => Single split strategy with two splits.
    MockFileSystem fs = new MockFileSystem(conf,
        new MockFile("mock:/a/b/000000_0", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/b/000000_1", 1000, new byte[1], new MockBlock("host1")));
    OrcInputFormat.FileGenerator gen =
        new OrcInputFormat.FileGenerator(context, fs,
            new MockPath(fs, "mock:/a"), false, null);
    List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen);
    assertEquals(1, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy);
    List<OrcSplit> splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(0)).getSplits();
    assertEquals(2, splits.size());
    assertEquals("mock:/a/b/000000_0", splits.get(0).getPath().toUri().toString());
    assertEquals("mock:/a/b/000000_1", splits.get(1).getPath().toUri().toString());
    assertTrue(splits.get(0).isOriginal());
    assertTrue(splits.get(1).isOriginal());

    // Case 2: Test with originals and base => Single split strategy with two splits on compacted
    // base since the presence of a base will make the originals obsolete.
    fs = new MockFileSystem(conf,
        new MockFile("mock:/a/b/000000_0", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/b/000000_1", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/base_0000001/bucket_00000", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/base_0000001/bucket_00001", 1000, new byte[1], new MockBlock("host1")));
    gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null);
    splitStrategies = createSplitStrategies(context, gen);
    assertEquals(1, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy);
    splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(0)).getSplits();
    assertEquals(2, splits.size());
    assertEquals("mock:/a/base_0000001/bucket_00000", splits.get(0).getPath().toUri().toString());
    assertEquals("mock:/a/base_0000001/bucket_00001", splits.get(1).getPath().toUri().toString());
    assertFalse(splits.get(0).isOriginal());
    assertFalse(splits.get(1).isOriginal());

    // Case 3: Test with originals and deltas => Two split strategies with two splits for each.
    fs = new MockFileSystem(conf,
        new MockFile("mock:/a/b/000000_0", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/b/000000_1", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delta_0000001_0000001_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delta_0000001_0000001_0000/bucket_00001", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delete_delta_0000001_0000001_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delete_delta_0000001_0000001_0000/bucket_00001", 1000, new byte[1], new MockBlock("host1")));
    gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null);
    splitStrategies = createSplitStrategies(context, gen);
    assertEquals(2, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy);
    splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(0)).getSplits();
    assertEquals(2, splits.size());
    assertEquals("mock:/a/b/000000_0", splits.get(0).getPath().toUri().toString());
    assertEquals("mock:/a/b/000000_1", splits.get(1).getPath().toUri().toString());
    assertTrue(splits.get(0).isOriginal());
    assertTrue(splits.get(1).isOriginal());
    assertEquals(true, splitStrategies.get(1) instanceof OrcInputFormat.ACIDSplitStrategy);
    splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(1)).getSplits();
    assertEquals(2, splits.size());
    assertEquals("mock:/a/delta_0000001_0000001_0000/bucket_00000", splits.get(0).getPath().toUri().toString());
    assertEquals("mock:/a/delta_0000001_0000001_0000/bucket_00001", splits.get(1).getPath().toUri().toString());
    assertFalse(splits.get(0).isOriginal());
    assertFalse(splits.get(1).isOriginal());

    // Case 4: Test with originals and deltas but now with only one bucket covered, i.e. we will
    // have originals & insert_deltas for only one bucket, but the delete_deltas will be for two
    // buckets => Two strategies with one split for each.
    // When split-update is enabled, we do not need to account for buckets that aren't covered.
    // The reason why we are able to do so is because the valid user data has already been considered
    // as base for the covered buckets. Hence, the uncovered buckets do not have any relevant
    // data and we can just ignore them.
    fs = new MockFileSystem(conf,
        new MockFile("mock:/a/b/000000_0", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delta_0000001_0000001_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delete_delta_0000001_0000001_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delete_delta_0000001_0000001_0000/bucket_00001", 1000, new byte[1], new MockBlock("host1")));
    gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null);
    splitStrategies = createSplitStrategies(context, gen);
    assertEquals(2, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy);
    splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(0)).getSplits();
    assertEquals(1, splits.size());
    assertEquals("mock:/a/b/000000_0", splits.get(0).getPath().toUri().toString());
    assertTrue(splits.get(0).isOriginal());
    assertEquals(true, splitStrategies.get(1) instanceof OrcInputFormat.ACIDSplitStrategy);
    splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(1)).getSplits();
    assertEquals(1, splits.size());
    assertEquals("mock:/a/delta_0000001_0000001_0000/bucket_00000", splits.get(0).getPath().toUri().toString());
    assertFalse(splits.get(0).isOriginal());

    // Case 5: Test with originals, compacted_base, insert_deltas, delete_deltas (exhaustive test)
    // This should just generate one strategy with splits for base and insert_deltas.
    fs = new MockFileSystem(conf,
        new MockFile("mock:/a/b/000000_0", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/b/000000_1", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/base_0000001/bucket_00000", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/base_0000001/bucket_00001", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delta_0000002_0000002_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delta_0000002_0000002_0000/bucket_00001", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delete_delta_0000002_0000002_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")),
        new MockFile("mock:/a/delete_delta_0000002_0000002_0000/bucket_00001", 1000, new byte[1], new MockBlock("host1")));
    gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null);
    splitStrategies = createSplitStrategies(context, gen);
    assertEquals(1, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy);
    splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(0)).getSplits();
    assertEquals(4, splits.size());
    assertEquals("mock:/a/base_0000001/bucket_00000", splits.get(0).getPath().toUri().toString());
    assertEquals("mock:/a/base_0000001/bucket_00001", splits.get(1).getPath().toUri().toString());
    assertEquals("mock:/a/delta_0000002_0000002_0000/bucket_00000", splits.get(2).getPath().toUri().toString());
    assertEquals("mock:/a/delta_0000002_0000002_0000/bucket_00001", splits.get(3).getPath().toUri().toString());
    assertFalse(splits.get(0).isOriginal());
    assertFalse(splits.get(1).isOriginal());
    assertFalse(splits.get(2).isOriginal());
    assertFalse(splits.get(3).isOriginal());
  }

  @Test
  public void testBIStrategySplitBlockBoundary() throws Exception {
    conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI");
    OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
    MockFileSystem fs = new MockFileSystem(conf,
        new MockFile("mock:/a/b/part-00", 1000, new byte[1], new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-01", 1000, new byte[1], new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-02", 1000, new byte[1], new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-03", 1000, new byte[1], new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-04", 1000, new byte[1], new MockBlock("host1", "host2")));
    OrcInputFormat.FileGenerator gen =
        new OrcInputFormat.FileGenerator(context, fs,
            new MockPath(fs, "mock:/a/b"), false, null);
    List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen);
    assertEquals(1, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.BISplitStrategy);
    List<OrcSplit> splits = ((OrcInputFormat.BISplitStrategy)splitStrategies.get(0)).getSplits();
    int numSplits = splits.size();
    assertEquals(5, numSplits);

    context = new OrcInputFormat.Context(conf);
    fs = new MockFileSystem(conf,
        new MockFile("mock:/a/b/part-00", 1000, new byte[1000], new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-01", 1000, new byte[1000], new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-02", 1000, new byte[1000], new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-03", 1000, new byte[1000], new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-04", 1000, new byte[1000], new MockBlock("host1", "host2")));
    gen = new OrcInputFormat.FileGenerator(context, fs,
        new MockPath(fs, "mock:/a/b"), false, null);
    splitStrategies = createSplitStrategies(context, gen);
    assertEquals(1, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.BISplitStrategy);
    splits = ((OrcInputFormat.BISplitStrategy)splitStrategies.get(0)).getSplits();
    numSplits = splits.size();
    assertEquals(5, numSplits);

    context = new OrcInputFormat.Context(conf);
    fs = new MockFileSystem(conf,
        new MockFile("mock:/a/b/part-00", 1000, new byte[1100], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-01", 1000, new byte[1100], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-02", 1000, new byte[1100], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-03", 1000, new byte[1100], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-04", 1000, new byte[1100], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2")));
    gen = new OrcInputFormat.FileGenerator(context, fs,
        new MockPath(fs, "mock:/a/b"), false, null);
    splitStrategies = createSplitStrategies(context, gen);
    assertEquals(1, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.BISplitStrategy);
    splits = ((OrcInputFormat.BISplitStrategy)splitStrategies.get(0)).getSplits();
    numSplits = splits.size();
    assertEquals(10, numSplits);

    context = new OrcInputFormat.Context(conf);
    fs = new MockFileSystem(conf,
        new MockFile("mock:/a/b/part-00", 1000, new byte[2000], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-01", 1000, new byte[2000], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-02", 1000, new byte[2000], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-03", 1000, new byte[2000], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-04", 1000, new byte[2000], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2")));
    gen = new OrcInputFormat.FileGenerator(context, fs,
        new MockPath(fs, "mock:/a/b"), false, null);
    splitStrategies = createSplitStrategies(context, gen);
    assertEquals(1, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.BISplitStrategy);
    splits = ((OrcInputFormat.BISplitStrategy)splitStrategies.get(0)).getSplits();
    numSplits = splits.size();
    assertEquals(10, numSplits);

    context = new OrcInputFormat.Context(conf);
    fs = new MockFileSystem(conf,
        new MockFile("mock:/a/b/part-00", 1000, new byte[2200], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2"), new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-01", 1000, new byte[2200], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2"), new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-02", 1000, new byte[2200], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2"), new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-03", 1000, new byte[2200], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2"), new MockBlock("host1", "host2")),
        new MockFile("mock:/a/b/part-04", 1000, new byte[2200], new MockBlock("host1", "host2"),
            new MockBlock("host1", "host2"), new MockBlock("host1", "host2")));
    gen = new OrcInputFormat.FileGenerator(context, fs,
        new MockPath(fs, "mock:/a/b"), false, null);
    splitStrategies = createSplitStrategies(context, gen);
    assertEquals(1, splitStrategies.size());
    assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.BISplitStrategy);
    splits = ((OrcInputFormat.BISplitStrategy)splitStrategies.get(0)).getSplits();
    numSplits = splits.size();
    assertEquals(15, numSplits);
  }

  @Test
  public void testEtlCombinedStrategy() throws Exception {
    conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "ETL");
    conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_DIRECTORY_BATCH_MS.varname, "1000000");
    OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
    MockFileSystem fs = new MockFileSystem(conf,
        new MockFile("mock:/a/1/part-00", 1000, new byte[1]),
        new MockFile("mock:/a/1/part-01", 1000, new byte[1]),
        new MockFile("mock:/a/2/part-00", 1000, new byte[1]),
        new MockFile("mock:/a/2/part-01", 1000, new byte[1]),
        new MockFile("mock:/a/3/base_0/1", 1000, new byte[1]),
        new MockFile("mock:/a/4/base_0/1", 1000, new byte[1]),
        new MockFile("mock:/a/5/base_0/1", 1000, new byte[1]),
        new MockFile("mock:/a/5/delta_0_25/1", 1000, new byte[1])
    );

    OrcInputFormat.CombinedCtx combineCtx = new OrcInputFormat.CombinedCtx();
    // The first directory becomes the base for combining.
    List<SplitStrategy<?>> ss = createOrCombineStrategies(context, fs, "mock:/a/1", combineCtx);
    assertTrue(ss.isEmpty());
    assertTrue(combineCtx.combined instanceof OrcInputFormat.ETLSplitStrategy);
    OrcInputFormat.ETLSplitStrategy etlSs = combineCtx.combined;
    assertEquals(2, etlSs.files.size());
    assertTrue(etlSs.isOriginal);
    assertEquals(1, etlSs.dirs.size());
    // The second one should be combined into the first.
    ss = createOrCombineStrategies(context, fs, "mock:/a/2", combineCtx);
    assertTrue(ss.isEmpty());
    assertTrue(combineCtx.combined instanceof OrcInputFormat.ETLSplitStrategy);
    assertEquals(4, etlSs.files.size());
    assertEquals(2, etlSs.dirs.size());
    // The third one has the base file, so it shouldn't be combined but could be a base.
    ss = createOrCombineStrategies(context, fs, "mock:/a/3", combineCtx);
    assertEquals(1, ss.size());
    assertSame(etlSs, ss.get(0));
    assertEquals(4, etlSs.files.size());
    assertEquals(2, etlSs.dirs.size());
    assertTrue(combineCtx.combined instanceof OrcInputFormat.ETLSplitStrategy);
    etlSs = combineCtx.combined;
    assertEquals(1, etlSs.files.size());
    assertFalse(etlSs.isOriginal);
    assertEquals(1, etlSs.dirs.size());
    // Try the first again, it would not be combined and we'd retain the old base (less files).
    ss = createOrCombineStrategies(context, fs, "mock:/a/1", combineCtx);
    assertEquals(1, ss.size());
    assertTrue(ss.get(0) instanceof OrcInputFormat.ETLSplitStrategy);
    assertNotSame(etlSs, ss.get(0));
    OrcInputFormat.ETLSplitStrategy rejectedEtlSs = (OrcInputFormat.ETLSplitStrategy)ss.get(0);
    assertEquals(2, rejectedEtlSs.files.size());
    assertEquals(1, rejectedEtlSs.dirs.size());
    assertTrue(rejectedEtlSs.isOriginal);
    assertEquals(1, etlSs.files.size());
    assertEquals(1, etlSs.dirs.size());
    // The fourth could be combined again.
    ss = createOrCombineStrategies(context, fs, "mock:/a/4", combineCtx);
    assertTrue(ss.isEmpty());
    assertTrue(combineCtx.combined instanceof OrcInputFormat.ETLSplitStrategy);
    assertEquals(2, etlSs.files.size());
    assertEquals(2, etlSs.dirs.size());
    // The fifth will not be combined because of delta files.
    ss = createOrCombineStrategies(context, fs, "mock:/a/5", combineCtx);
    assertEquals(1, ss.size());
    assertTrue(ss.get(0) instanceof OrcInputFormat.ETLSplitStrategy);
    assertNotSame(etlSs, ss);
    assertEquals(2, etlSs.files.size());
    assertEquals(2, etlSs.dirs.size());
  }

  public List<SplitStrategy<?>> createOrCombineStrategies(OrcInputFormat.Context context,
      MockFileSystem fs, String path, OrcInputFormat.CombinedCtx combineCtx) throws IOException {
    OrcInputFormat.AcidDirInfo adi = createAdi(context, fs, path);
    return OrcInputFormat.determineSplitStrategies(combineCtx, context,
        adi.fs, adi.splitPath, adi.acidInfo, adi.baseFiles, adi.parsedDeltas,
        null, null, true);
  }

  public OrcInputFormat.AcidDirInfo createAdi(
      OrcInputFormat.Context context, MockFileSystem fs, String path) throws IOException {
    return new OrcInputFormat.FileGenerator(
        context, fs, new MockPath(fs, path), false, null).call();
  }

  private List<OrcInputFormat.SplitStrategy<?>> createSplitStrategies(
      OrcInputFormat.Context context, OrcInputFormat.FileGenerator gen) throws IOException {
    OrcInputFormat.AcidDirInfo adi = gen.call();
    return OrcInputFormat.determineSplitStrategies(
        null, context, adi.fs, adi.splitPath, adi.acidInfo, adi.baseFiles, adi.parsedDeltas,
        null, null, true);
  }

  public static class MockBlock {
    int offset;
    int length;
    final String[] hosts;

    public MockBlock(String... hosts) {
      this.hosts = hosts;
    }

    public void setOffset(int offset) {
      this.offset = offset;
    }

    public void setLength(int length) {
      this.length = length;
    }

    @Override
    public String toString() {
      StringBuilder buffer = new StringBuilder();
      buffer.append("block{offset: ");
      buffer.append(offset);
      buffer.append(", length: ");
      buffer.append(length);
      buffer.append(", hosts: [");
      for(int i=0; i < hosts.length; i++) {
        if (i != 0) {
          buffer.append(", ");
        }
        buffer.append(hosts[i]);
      }
      buffer.append("]}");
      return buffer.toString();
    }
  }

  public static class MockFile {
    final Path path;
    int blockSize;
    int length;
    MockBlock[] blocks;
    byte[] content;

    public MockFile(String path, int blockSize, byte[] content,
                    MockBlock... blocks) {
      this.path = new Path(path);
      this.blockSize = blockSize;
      this.blocks = blocks;
      this.content = content;
      this.length = content.length;
      int offset = 0;
      for(MockBlock block: blocks) {
        block.offset = offset;
        block.length = Math.min(length - offset, blockSize);
        offset += block.length;
      }
    }

    @Override
    public int hashCode() {
      return path.hashCode() + 31 * length;
    }

    @Override
    public boolean equals(final Object obj) {
      if (!(obj instanceof MockFile)) { return false; }
      return ((MockFile) obj).path.equals(this.path) && ((MockFile) obj).length == this.length;
    }

    @Override
    public String toString() {
      StringBuilder buffer = new StringBuilder();
      buffer.append("mockFile{path: ");
      buffer.append(path.toString());
      buffer.append(", blkSize: ");
      buffer.append(blockSize);
      buffer.append(", len: ");
      buffer.append(length);
      buffer.append(", blocks: [");
      for(int i=0; i < blocks.length; i++) {
        if (i != 0) {
          buffer.append(", ");
        }
        buffer.append(blocks[i]);
      }
      buffer.append("]}");
      return buffer.toString();
    }
  }

  static class MockInputStream extends FSInputStream {
    final MockFile file;
    int offset = 0;

    public MockInputStream(MockFile file) throws IOException {
      this.file = file;
    }

    @Override
    public void seek(long offset) throws IOException {
      this.offset = (int) offset;
    }

    @Override
    public long getPos() throws IOException {
      return offset;
    }

    @Override
    public boolean seekToNewSource(long l) throws IOException {
      return false;
    }

    @Override
    public int read() throws IOException {
      if (offset < file.length) {
        return file.content[offset++] & 0xff;
      }
      return -1;
    }
  }

  public static class MockPath extends Path {
    private final FileSystem fs;
    public MockPath(FileSystem fs, String path) {
      super(path);
      this.fs = fs;
    }
    @Override
    public FileSystem getFileSystem(Configuration conf) {
      return fs;
    }
  }

  public static class MockOutputStream extends FSDataOutputStream {
    private final MockFile file;

    public MockOutputStream(MockFile file) throws IOException {
      super(new DataOutputBuffer(), null);
      this.file = file;
    }

    /**
     * Set the blocks and their location for the file.
     * Must be called after the stream is closed or the block length will be
     * wrong.
     * @param blocks the list of blocks
     */
    public void setBlocks(MockBlock... blocks) {
      file.blocks = blocks;
      int offset = 0;
      int i = 0;
      while (offset < file.length && i < blocks.length) {
        blocks[i].offset = offset;
        blocks[i].length = Math.min(file.length - offset, file.blockSize);
        offset += blocks[i].length;
        i += 1;
      }
    }

    @Override
    public void close() throws IOException {
      super.close();
      DataOutputBuffer buf = (DataOutputBuffer) getWrappedStream();
      file.length = buf.getLength();
      file.content = new byte[file.length];
      MockBlock block = new MockBlock("host1");
      block.setLength(file.length);
      setBlocks(block);
      System.arraycopy(buf.getData(), 0, file.content, 0, file.length);
    }

    @Override
    public String toString() {
      return "Out stream to " + file.toString();
    }
  }

  public static class MockFileSystem extends FileSystem {
    final List<MockFile> files = new ArrayList<MockFile>();
    final Map<MockFile, FileStatus> fileStatusMap = new HashMap<>();
    Path workingDir = new Path("/");
    // statics for when the mock fs is created via FileSystem.get
    private static String blockedUgi = null;
    private final static List<MockFile> globalFiles = new ArrayList<MockFile>();
    protected Statistics statistics;

    public MockFileSystem() {
      // empty
    }

    @Override
    public void initialize(URI uri, Configuration conf) {
      setConf(conf);
      statistics = getStatistics("mock", getClass());
    }

    public MockFileSystem(Configuration conf, MockFile... files) {
      setConf(conf);
      this.files.addAll(Arrays.asList(files));
      statistics = getStatistics("mock", getClass());
    }

    public static void setBlockedUgi(String s) {
      blockedUgi = s;
    }

    void clear() {
      files.clear();
    }

    @Override
    public URI getUri() {
      try {
        return new URI("mock:///");
      } catch (URISyntaxException err) {
        throw new IllegalArgumentException("huh?", err);
      }
    }

    // increments file modification time
    public void touch(MockFile file) {
      if (fileStatusMap.containsKey(file)) {
        FileStatus fileStatus = fileStatusMap.get(file);
        FileStatus fileStatusNew = new FileStatus(fileStatus.getLen(), fileStatus.isDirectory(),
            fileStatus.getReplication(), fileStatus.getBlockSize(),
            fileStatus.getModificationTime() + 1, fileStatus.getAccessTime(),
            fileStatus.getPermission(), fileStatus.getOwner(), fileStatus.getGroup(),
            fileStatus.getPath());
        fileStatusMap.put(file, fileStatusNew);
      }
    }

    @SuppressWarnings("serial")
    public static class MockAccessDenied extends IOException {
    }

    @Override
    public FSDataInputStream open(Path path, int i) throws IOException {
      statistics.incrementReadOps(1);
      checkAccess();
      MockFile file = findFile(path);
      if (file != null) return new FSDataInputStream(new MockInputStream(file));
      throw new IOException("File not found: " + path);
    }

    private MockFile findFile(Path path) {
      for (MockFile file: files) {
        if (file.path.equals(path)) {
          return file;
        }
      }
      for (MockFile file: globalFiles) {
        if (file.path.equals(path)) {
          return file;
        }
      }
      return null;
    }

    private void checkAccess() throws IOException {
      if (blockedUgi == null) return;
      if (!blockedUgi.equals(UserGroupInformation.getCurrentUser().getShortUserName())) return;
      throw new MockAccessDenied();
    }

    @Override
    public FSDataOutputStream create(Path path, FsPermission fsPermission,
                                     boolean overwrite, int bufferSize,
                                     short replication, long blockSize,
                                     Progressable progressable
                                     ) throws IOException {
      statistics.incrementWriteOps(1);
      checkAccess();
      MockFile file = findFile(path);
      if (file == null) {
        file = new MockFile(path.toString(), (int) blockSize, new byte[0]);
        files.add(file);
      }
      return new MockOutputStream(file);
    }

    @Override
    public FSDataOutputStream append(Path path, int bufferSize,
                                     Progressable progressable
                                     ) throws IOException {
      statistics.incrementWriteOps(1);
      checkAccess();
      return create(path, FsPermission.getDefault(), true, bufferSize,
          (short) 3, 256 * 1024, progressable);
    }

    @Override
    public boolean rename(Path path, Path path2) throws IOException {
      statistics.incrementWriteOps(1);
      checkAccess();
      return false;
    }

    @Override
    public boolean delete(Path path) throws IOException {
      statistics.incrementWriteOps(1);
      checkAccess();
      return false;
    }

    @Override
    public boolean delete(Path path, boolean b) throws IOException {
      statistics.incrementWriteOps(1);
      checkAccess();
      return false;
    }

    @Override
    public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f)
        throws IOException {
      return new RemoteIterator<LocatedFileStatus>() {
        private Iterator<LocatedFileStatus> iterator = listLocatedFileStatuses(f).iterator();

        @Override
        public boolean hasNext() throws IOException {
          return iterator.hasNext();
        }

        @Override
        public LocatedFileStatus next() throws IOException {
          return iterator.next();
        }
      };
    }

    private List<LocatedFileStatus> listLocatedFileStatuses(Path path) throws IOException {
      statistics.incrementReadOps(1);
      checkAccess();
      path = path.makeQualified(this);
      List<LocatedFileStatus> result = new ArrayList<>();
      String pathname = path.toString();
      String pathnameAsDir = pathname + "/";
      Set<String> dirs = new TreeSet<String>();
      MockFile file = findFile(path);
      if (file != null) {
        result.add(createLocatedStatus(file));
        return result;
      }
      findMatchingLocatedFiles(files, pathnameAsDir, dirs, result);
      findMatchingLocatedFiles(globalFiles, pathnameAsDir, dirs, result);
      // for each directory add it once
      for(String dir: dirs) {
        result.add(createLocatedDirectory(new MockPath(this, pathnameAsDir + dir)));
      }
      return result;
    }

    @Override
    public FileStatus[] listStatus(Path path) throws IOException {
      statistics.incrementReadOps(1);
      checkAccess();
      path = path.makeQualified(this);
      List<FileStatus> result = new ArrayList<FileStatus>();
      String pathname = path.toString();
      String pathnameAsDir = pathname + "/";
      Set<String> dirs = new TreeSet<String>();
      MockFile file = findFile(path);
      if (file != null) {
        return new FileStatus[]{createStatus(file)};
      }
      findMatchingFiles(files, pathnameAsDir, dirs, result);
      findMatchingFiles(globalFiles, pathnameAsDir, dirs, result);
      // for each directory add it once
      for(String dir: dirs) {
        result.add(createDirectory(new MockPath(this, pathnameAsDir + dir)));
      }
      return result.toArray(new FileStatus[result.size()]);
    }

    private void findMatchingFiles(
        List<MockFile> files, String pathnameAsDir, Set<String> dirs, List<FileStatus> result) {
      for (MockFile file: files) {
        String filename = file.path.toString();
        if (filename.startsWith(pathnameAsDir)) {
          String tail = filename.substring(pathnameAsDir.length());
          int nextSlash = tail.indexOf('/');
          if (nextSlash > 0) {
            dirs.add(tail.substring(0, nextSlash));
          } else {
            result.add(createStatus(file));
          }
        }
      }
    }

    private void findMatchingLocatedFiles(
        List<MockFile> files, String pathnameAsDir, Set<String> dirs, List<LocatedFileStatus> result)
        throws IOException {
      for (MockFile file: files) {
        String filename = file.path.toString();
        if (filename.startsWith(pathnameAsDir)) {
          String tail = filename.substring(pathnameAsDir.length());
          int nextSlash = tail.indexOf('/');
          if (nextSlash > 0) {
            dirs.add(tail.substring(0, nextSlash));
          } else {
            result.add(createLocatedStatus(file));
          }
        }
      }
    }

    @Override
    public void setWorkingDirectory(Path path) {
      workingDir = path;
    }

    @Override
    public Path getWorkingDirectory() {
      return workingDir;
    }

    @Override
    public boolean mkdirs(Path path, FsPermission fsPermission) {
      statistics.incrementWriteOps(1);
      return false;
    }

    private FileStatus createStatus(MockFile file) {
      if (fileStatusMap.containsKey(file)) {
        return fileStatusMap.get(file);
      }
      FileStatus fileStatus = new FileStatus(file.length, false, 1, file.blockSize, 0, 0,
          FsPermission.createImmutable((short) 644), "owen", "group",
          file.path);
      fileStatusMap.put(file, fileStatus);
      return fileStatus;
    }

    private FileStatus createDirectory(Path dir) {
      return new FileStatus(0, true, 0, 0, 0, 0,
          FsPermission.createImmutable((short) 755), "owen", "group", dir);
    }

    private LocatedFileStatus createLocatedStatus(MockFile file) throws IOException {
      FileStatus fileStatus = createStatus(file);
      return new LocatedFileStatus(fileStatus,
          getFileBlockLocationsImpl(fileStatus, 0, fileStatus.getLen(), false));
    }

    private LocatedFileStatus createLocatedDirectory(Path dir) throws IOException {
      FileStatus fileStatus = createDirectory(dir);
      return new LocatedFileStatus(fileStatus,
          getFileBlockLocationsImpl(fileStatus, 0, fileStatus.getLen(), false));
    }

    @Override
    public FileStatus getFileStatus(Path path) throws IOException {
      statistics.incrementReadOps(1);
      checkAccess();
      path = path.makeQualified(this);
      String pathnameAsDir = path.toString() + "/";
      MockFile file = findFile(path);
      if (file != null) return createStatus(file);
      for (MockFile dir : files) {
        if (dir.path.toString().startsWith(pathnameAsDir)) {
          return createDirectory(path);
        }
      }
      for (MockFile dir : globalFiles) {
        if (dir.path.toString().startsWith(pathnameAsDir)) {
          return createDirectory(path);
        }
      }
      throw new FileNotFoundException("File " + path + " does not exist");
    }

    @Override
    public BlockLocation[] getFileBlockLocations(FileStatus stat,
                                                 long start, long len) throws IOException {
      return getFileBlockLocationsImpl(stat, start, len, true);
    }

    private BlockLocation[] getFileBlockLocationsImpl(final FileStatus stat, final long start,
        final long len,
        final boolean updateStats) throws IOException {
      if (updateStats) {
        statistics.incrementReadOps(1);
      }
      checkAccess();
      List<BlockLocation> result = new ArrayList<BlockLocation>();
      MockFile file = findFile(stat.getPath());
      if (file != null) {
        for(MockBlock block: file.blocks) {
          if (OrcInputFormat.SplitGenerator.getOverlap(block.offset,
              block.length, start, len) > 0) {
            String[] topology = new String[block.hosts.length];
            for(int i=0; i < topology.length; ++i) {
              topology[i] = "/rack/ " + block.hosts[i];
            }
            result.add(new BlockLocation(block.hosts, block.hosts,
                topology, block.offset, block.length));
          }
        }
        return result.toArray(new BlockLocation[result.size()]);
      }
      return new BlockLocation[0];
    }

    @Override
    public String toString() {
      StringBuilder buffer = new StringBuilder();
      buffer.append("mockFs{files:[");
      for(int i=0; i < files.size(); ++i) {
        if (i != 0) {
          buffer.append(", ");
        }
        buffer.append(files.get(i));
      }
      buffer.append("]}");
      return buffer.toString();
    }

    public static void addGlobalFile(MockFile mockFile) {
      globalFiles.add(mockFile);
    }

    public static void clearGlobalFiles() {
      globalFiles.clear();
    }
  }

  static void fill(DataOutputBuffer out, long length) throws IOException {
    for(int i=0; i < length; ++i) {
      out.write(0);
    }
  }

  /**
   * Create the binary contents of an ORC file that just has enough information
   * to test the getInputSplits.
   * @param stripeLengths the length of each stripe
   * @return the bytes of the file
   * @throws IOException
   */
  static byte[] createMockOrcFile(long... stripeLengths) throws IOException {
    OrcProto.Footer.Builder footer = OrcProto.Footer.newBuilder();
    final long headerLen = 3;
    long offset = headerLen;
    DataOutputBuffer buffer = new DataOutputBuffer();
    for(long stripeLength: stripeLengths) {
      footer.addStripes(OrcProto.StripeInformation.newBuilder()
                          .setOffset(offset)
                          .setIndexLength(0)
                          .setDataLength(stripeLength-10)
                          .setFooterLength(10)
                          .setNumberOfRows(1000));
      offset += stripeLength;
    }
    fill(buffer, offset);
    footer.addTypes(OrcProto.Type.newBuilder()
        .setKind(OrcProto.Type.Kind.STRUCT)
        .addFieldNames("col1")
        .addSubtypes(1));
    footer.addTypes(OrcProto.Type.newBuilder()
        .setKind(OrcProto.Type.Kind.STRING));
    footer.setNumberOfRows(1000 * stripeLengths.length)
          .setHeaderLength(headerLen)
          .setContentLength(offset - headerLen);
    footer.addStatistics(OrcProto.ColumnStatistics.newBuilder()
        .setNumberOfValues(1000 * stripeLengths.length).build());
    footer.addStatistics(OrcProto.ColumnStatistics.newBuilder()
        .setNumberOfValues(1000 * stripeLengths.length)
        .setStringStatistics(
            OrcProto.StringStatistics.newBuilder()
                .setMaximum("zzz")
                .setMinimum("aaa")
                .setSum(1000 * 3 * stripeLengths.length)
                .build()
        ).build());
    footer.build().writeTo(buffer);
    int footerEnd = buffer.getLength();
    OrcProto.PostScript ps =
      OrcProto.PostScript.newBuilder()
        .setCompression(OrcProto.CompressionKind.NONE)
        .setFooterLength(footerEnd - offset)
        .setMagic("ORC")
        .build();
    ps.writeTo(buffer);
    buffer.write(buffer.getLength() - footerEnd);
    byte[] result = new byte[buffer.getLength()];
    System.arraycopy(buffer.getData(), 0, result, 0, buffer.getLength());
    return result;
  }

  @Test
  public void testAddSplit() throws Exception {
    // create a file with 5 blocks spread around the cluster
    MockFileSystem fs = new MockFileSystem(conf,
        new MockFile("mock:/a/file", 500,
          createMockOrcFile(197, 300, 600, 200, 200, 100, 100, 100, 100, 100),
          new MockBlock("host1-1", "host1-2", "host1-3"),
          new MockBlock("host2-1", "host0", "host2-3"),
          new MockBlock("host0", "host3-2", "host3-3"),
          new MockBlock("host4-1", "host4-2", "host4-3"),
          new MockBlock("host5-1", "host5-2", "host5-3")));
    OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
    OrcInputFormat.SplitGenerator splitter =
        new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
            fs.getFileStatus(new Path("/a/file")), null, null, true,
            new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null, true);
    OrcSplit result = splitter.createSplit(0, 200, null);
    assertEquals(0, result.getStart());
    assertEquals(200, result.getLength());
    assertEquals("mock:/a/file", result.getPath().toString());
    String[] locs = result.getLocations();
    assertEquals(3, locs.length);
    assertEquals("host1-1", locs[0]);
    assertEquals("host1-2", locs[1]);
    assertEquals("host1-3", locs[2]);
    result = splitter.createSplit(500, 600, null);
    locs = result.getLocations();
    assertEquals(3, locs.length);
    assertEquals("host2-1", locs[0]);
    assertEquals("host0", locs[1]);
    assertEquals("host2-3", locs[2]);
    result = splitter.createSplit(0, 2500, null);
    locs = result.getLocations();
    assertEquals(1, locs.length);
    assertEquals("host0", locs[0]);
  }

  @Test
  public void testSplitGenerator() throws Exception {
    // create a file with 5 blocks spread around the cluster
    long[] stripeSizes =
        new long[]{197, 300, 600, 200, 200, 100, 100, 100, 100, 100};
    MockFileSystem fs = new MockFileSystem(conf,
        new MockFile("mock:/a/file", 500,
          createMockOrcFile(stripeSizes),
          new MockBlock("host1-1", "host1-2", "host1-3"),
          new MockBlock("host2-1", "host0", "host2-3"),
          new MockBlock("host0", "host3-2", "host3-3"),
          new MockBlock("host4-1", "host4-2", "host4-3"),
          new MockBlock("host5-1", "host5-2", "host5-3")));
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 300);
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 200);
    OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
    OrcInputFormat.SplitGenerator splitter =
        new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
            fs.getFileStatus(new Path("/a/file")), null, null, true,
            new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null, true);
    List<OrcSplit> results = splitter.call();
    OrcSplit result = results.get(0);
    assertEquals(3, result.getStart());
    assertEquals(497, result.getLength());
    result = results.get(1);
    assertEquals(500, result.getStart());
    assertEquals(600, result.getLength());
    result = results.get(2);
    assertEquals(1100, result.getStart());
    assertEquals(400, result.getLength());
    result = results.get(3);
    assertEquals(1500, result.getStart());
    assertEquals(300, result.getLength());
    result = results.get(4);
    assertEquals(1800, result.getStart());
    assertEquals(200, result.getLength());
    // test min = 0, max = 0 generates each stripe
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 0);
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 0);
    context = new OrcInputFormat.Context(conf);
    splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
      fs.getFileStatus(new Path("/a/file")), null, null, true,
        new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null, true);
    results = splitter.call();
    for(int i=0; i < stripeSizes.length; ++i) {
      assertEquals("checking stripe " + i + " size",
          stripeSizes[i], results.get(i).getLength());
    }
  }

  @Test
  public void testProjectedColumnSize() throws Exception {
    long[] stripeSizes =
        new long[]{200, 200, 200, 200, 100};
    MockFileSystem fs = new MockFileSystem(conf,
        new MockFile("mock:/a/file", 500,
            createMockOrcFile(stripeSizes),
            new MockBlock("host1-1", "host1-2", "host1-3"),
            new MockBlock("host2-1", "host0", "host2-3"),
            new MockBlock("host0", "host3-2", "host3-3"),
            new MockBlock("host4-1", "host4-2", "host4-3"),
            new MockBlock("host5-1", "host5-2", "host5-3")));
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 300);
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 200);
    conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
    OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
    OrcInputFormat.SplitGenerator splitter =
        new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
            fs.getFileStatus(new Path("/a/file")), null, null, true,
            new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null, true);
    List<OrcSplit> results = splitter.call();
    OrcSplit result = results.get(0);
    assertEquals(3, results.size());
    assertEquals(3, result.getStart());
    assertEquals(400, result.getLength());
    assertEquals(167468, result.getProjectedColumnsUncompressedSize());
    result = results.get(1);
    assertEquals(403, result.getStart());
    assertEquals(400, result.getLength());
    assertEquals(167468, result.getProjectedColumnsUncompressedSize());
    result = results.get(2);
    assertEquals(803, result.getStart());
    assertEquals(100, result.getLength());
    assertEquals(41867, result.getProjectedColumnsUncompressedSize());

    // test min = 0, max = 0 generates each stripe
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 0);
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 0);
    context = new OrcInputFormat.Context(conf);
    splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
        fs.getFileStatus(new Path("/a/file")), null, null, true,
        new ArrayList<AcidInputFormat.DeltaMetaData>(),
        true, null, null), null, true);
    results = splitter.call();
    assertEquals(5, results.size());
    for (int i = 0; i < stripeSizes.length; ++i) {
      assertEquals("checking stripe " + i + " size",
          stripeSizes[i], results.get(i).getLength());
      if (i == stripeSizes.length - 1) {
        assertEquals(41867, results.get(i).getProjectedColumnsUncompressedSize());
      } else {
        assertEquals(83734, results.get(i).getProjectedColumnsUncompressedSize());
      }
    }

    // single split
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 1000);
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 100000);
    context = new OrcInputFormat.Context(conf);
    splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
        fs.getFileStatus(new Path("/a/file")), null, null, true,
        new ArrayList<AcidInputFormat.DeltaMetaData>(),
        true, null, null), null, true);
    results = splitter.call();
    assertEquals(1, results.size());
    result = results.get(0);
    assertEquals(3, result.getStart());
    assertEquals(900, result.getLength());
    assertEquals(376804, result.getProjectedColumnsUncompressedSize());
  }

  @Test
  public void testInOutFormat() throws Exception {
    Properties properties = new Properties();
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    AbstractSerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer =
        outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);
    writer.write(serde.serialize(new MyRow(1,2), inspector));
    writer.write(serde.serialize(new MyRow(2,2), inspector));
    writer.write(serde.serialize(new MyRow(3,2), inspector));
    writer.close(true);
    serde = new OrcSerde();
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<x:int,y:int>", inspector.getTypeName());
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // the the validate input method
    ArrayList<FileStatus> fileList = new ArrayList<FileStatus>();
    assertEquals(false,
        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(testFilePath));
    assertEquals(true,
        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(workDir));
    assertEquals(false,
        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));


    // read the whole file
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    org.apache.hadoop.mapred.RecordReader reader =
        in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Writable value = (Writable) reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields =inspector.getAllStructFieldRefs();
    IntObjectInspector intInspector =
        (IntObjectInspector) fields.get(0).getFieldObjectInspector();

    // UNDONE: Don't know why HIVE-12894 causes this to return 0?
    // assertEquals(0.33, reader.getProgress(), 0.01);

    while (reader.next(key, value)) {
      assertEquals(++rowNum, intInspector.get(inspector.
          getStructFieldData(serde.deserialize(value), fields.get(0))));
      assertEquals(2, intInspector.get(inspector.
          getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    assertEquals(1.0, reader.getProgress(), 0.00001);
    reader.close();

    // read just the first column
    ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(0));
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
      assertEquals(++rowNum, intInspector.get(inspector.
          getStructFieldData(value, fields.get(0))));
      assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
    }
    assertEquals(3, rowNum);
    reader.close();

    // test the mapping of empty string to all columns
    ColumnProjectionUtils.setReadAllColumns(conf);
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
      assertEquals(++rowNum, intInspector.get(inspector.
          getStructFieldData(value, fields.get(0))));
      assertEquals(2, intInspector.get(inspector.
          getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    reader.close();
  }

  static class SimpleRow implements Writable {
    Text z;

    public SimpleRow(Text t) {
      this.z = t;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
      throw new UnsupportedOperationException("unsupported");
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
      throw new UnsupportedOperationException("unsupported");
    }
  }

  static class NestedRow implements Writable {
    int z;
    MyRow r;
    NestedRow(int x, int y, int z) {
      this.z = z;
      this.r = new MyRow(x,y);
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
      throw new UnsupportedOperationException("unsupported");
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
      throw new UnsupportedOperationException("unsupported");
    }
  }

  @Test
  public void testMROutput() throws Exception {
    Properties properties = new Properties();
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    AbstractSerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer =
        outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
            Reporter.NULL);
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(1,2,3), inspector));
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(4,5,6), inspector));
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(7,8,9), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "z,r");
    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(1));
    conf.set("columns", "z,r");
    conf.set("columns.types", "int:struct<x:int,y:int>");
    org.apache.hadoop.mapred.RecordReader reader =
        in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StructObjectInspector inner = (StructObjectInspector)
        fields.get(1).getFieldObjectInspector();
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    IntObjectInspector intInspector =
        (IntObjectInspector) fields.get(0).getFieldObjectInspector();
    while (reader.next(key, value)) {
      assertEquals(null, inspector.getStructFieldData(value, fields.get(0)));
      Object sub = inspector.getStructFieldData(value, fields.get(1));
      assertEquals(3*rowNum+1, intInspector.get(inner.getStructFieldData(sub,
          inFields.get(0))));
      assertEquals(3*rowNum+2, intInspector.get(inner.getStructFieldData(sub,
          inFields.get(1))));
      rowNum += 1;
    }
    assertEquals(3, rowNum);
    reader.close();

  }

  @Test
  public void testEmptyFile() throws Exception {
    Properties properties = new Properties();
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer =
        outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);
    writer.close(true);
    AbstractSerDe serde = new OrcSerde();
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertTrue(0 == splits.length);
    assertEquals(null, serde.getSerDeStats());
  }

  @Test(expected = RuntimeException.class)
  public void testSplitGenFailure() throws IOException {
    Properties properties = new Properties();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer =
        outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);
    writer.write(new OrcSerde().serialize(null,null));
    writer.close(true);
    InputFormat<?,?> in = new OrcInputFormat();
    fs.setPermission(testFilePath, FsPermission.createImmutable((short) 0333));
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    try {
      in.getSplits(conf, 1);
    } catch (RuntimeException e) {
      assertEquals(true, e.getMessage().contains("Permission denied"));
      throw e;
    }
  }

  static class StringRow implements Writable {
    String str;
    String str2;
    StringRow(String s) {
      str = s;
      str2 = s;
    }
    @Override
    public void write(DataOutput dataOutput) throws IOException {
      throw new UnsupportedOperationException("no write");
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
      throw new UnsupportedOperationException("no read");
    }

    static String getColumnNamesProperty() {
      return "str,str2";
    }
    static String getColumnTypesProperty() {
      return "string:string";
    }

  }

  @Test
  public void testDefaultTypes() throws Exception {
    Properties properties = new Properties();
    properties.setProperty("columns", "str,str2");
    properties.setProperty("columns.types", "string:string");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    AbstractSerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer =
        outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class,
            true, properties, Reporter.NULL);
    writer.write(serde.serialize(new StringRow("owen"), inspector));
    writer.write(serde.serialize(new StringRow("beth"), inspector));
    writer.write(serde.serialize(new StringRow("laurel"), inspector));
    writer.write(serde.serialize(new StringRow("hazen"), inspector));
    writer.write(serde.serialize(new StringRow("colin"), inspector));
    writer.write(serde.serialize(new StringRow("miles"), inspector));
    writer.close(true);
    serde = new OrcSerde();
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    conf.set("columns", StringRow.getColumnNamesProperty());
    conf.set("columns.types", StringRow.getColumnTypesProperty());
    org.apache.hadoop.mapred.RecordReader reader =
        in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Writable value = (Writable) reader.createValue();
    List<? extends StructField> fields =inspector.getAllStructFieldRefs();
    StringObjectInspector strInspector = (StringObjectInspector)
        fields.get(0).getFieldObjectInspector();
    assertEquals(true, reader.next(key, value));
    assertEquals("owen", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("beth", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("laurel", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("hazen", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("colin", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("miles", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(false, reader.next(key, value));
    reader.close();
  }

  /**
   * Create a mock execution environment that has enough detail that
   * ORC, vectorization, HiveInputFormat, and CombineHiveInputFormat don't
   * explode.
   * @param workDir a local filesystem work directory
   * @param warehouseDir a mock filesystem warehouse directory
   * @param tableName the table name
   * @param objectInspector object inspector for the row
   * @param isVectorized should run vectorized
   * @return a JobConf that contains the necessary information
   * @throws IOException
   * @throws HiveException
   */
  JobConf createMockExecutionEnvironment(Path workDir,
                                         Path warehouseDir,
                                         String tableName,
                                         ObjectInspector objectInspector,
                                         boolean isVectorized,
                                         int partitions
                                         ) throws IOException, HiveException {
    JobConf conf = new JobConf();
    Utilities.clearWorkMap(conf);
    conf.set("hive.exec.plan", workDir.toString());
    conf.set("mapred.job.tracker", "local");
    String isVectorizedString = Boolean.toString(isVectorized);
    conf.set("hive.vectorized.execution.enabled", isVectorizedString);
    conf.set(Utilities.VECTOR_MODE, isVectorizedString);
    conf.set(Utilities.USE_VECTORIZED_INPUT_FILE_FORMAT, isVectorizedString);
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    conf.set("mapred.mapper.class", ExecMapper.class.getName());
    Path root = new Path(warehouseDir, tableName);
    // clean out previous contents
    ((MockFileSystem) root.getFileSystem(conf)).clear();
    // build partition strings
    String[] partPath = new String[partitions];
    StringBuilder buffer = new StringBuilder();
    for(int p=0; p < partitions; ++p) {
      partPath[p] = new Path(root, "p=" + p).toString();
      if (p != 0) {
        buffer.append(',');
      }
      buffer.append(partPath[p]);
    }
    conf.set("mapred.input.dir", buffer.toString());
    StringBuilder columnIds = new StringBuilder();
    StringBuilder columnNames = new StringBuilder();
    StringBuilder columnTypes = new StringBuilder();
    StructObjectInspector structOI = (StructObjectInspector) objectInspector;
    List<? extends StructField> fields = structOI.getAllStructFieldRefs();
    int numCols = fields.size();
    for(int i=0; i < numCols; ++i) {
      if (i != 0) {
        columnIds.append(',');
        columnNames.append(',');
        columnTypes.append(',');
      }
      columnIds.append(i);
      columnNames.append(fields.get(i).getFieldName());
      columnTypes.append(fields.get(i).getFieldObjectInspector().getTypeName());
    }
    conf.set("hive.io.file.readcolumn.ids", columnIds.toString());
    conf.set("partition_columns", "p");
    conf.set(serdeConstants.LIST_COLUMNS, columnNames.toString());
    conf.set(serdeConstants.LIST_COLUMN_TYPES, columnTypes.toString());
    MockFileSystem fs = (MockFileSystem) warehouseDir.getFileSystem(conf);
    fs.clear();

    Properties tblProps = new Properties();
    tblProps.put("name", tableName);
    tblProps.put("serialization.lib", OrcSerde.class.getName());
    tblProps.put("columns", columnNames.toString());
    tblProps.put("columns.types", columnTypes.toString());
    TableDesc tbl = new TableDesc(OrcInputFormat.class, OrcOutputFormat.class,
        tblProps);

    MapWork mapWork = new MapWork();
    mapWork.setVectorMode(isVectorized);
    if (isVectorized) {
      VectorizedRowBatchCtx vectorizedRowBatchCtx = new VectorizedRowBatchCtx();
      vectorizedRowBatchCtx.init(structOI, new String[0]);
      mapWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx);
    }
    mapWork.setUseBucketizedHiveInputFormat(false);
    LinkedHashMap<Path, ArrayList<String>> aliasMap = new LinkedHashMap<>();
    ArrayList<String> aliases = new ArrayList<String>();
    aliases.add(tableName);
    LinkedHashMap<Path, PartitionDesc> partMap = new LinkedHashMap<>();
    for(int p=0; p < partitions; ++p) {
      Path path = new Path(partPath[p]);
      aliasMap.put(path, aliases);
      LinkedHashMap<String, String> partSpec =
          new LinkedHashMap<String, String>();
      PartitionDesc part = new PartitionDesc(tbl, partSpec);
      if (isVectorized) {
        part.setVectorPartitionDesc(
            VectorPartitionDesc.createVectorizedInputFileFormat("MockInputFileFormatClassName", false));
      }
      partMap.put(path, part);
    }
    mapWork.setPathToAliases(aliasMap);
    mapWork.setPathToPartitionInfo(partMap);

    // write the plan out
    FileSystem localFs = FileSystem.getLocal(conf).getRaw();
    Path mapXml = new Path(workDir, "map.xml");
    localFs.delete(mapXml, true);
    FSDataOutputStream planStream = localFs.create(mapXml);
    SerializationUtilities.serializePlan(mapWork, planStream);
    conf.setBoolean(Utilities.HAS_MAP_WORK, true);
    planStream.close();
    return conf;
  }

  /**
   * Set the mockblocks for a file after it has been written
   * @param path the path to modify
   * @param conf the configuration
   * @param blocks the blocks to uses
   * @throws IOException
   */
  static void setBlocks(Path path, Configuration conf,
                        MockBlock... blocks) throws IOException {
    FileSystem mockFs = path.getFileSystem(conf);
    MockOutputStream stream = (MockOutputStream) mockFs.create(path);
    stream.setBlocks(blocks);
  }

  static int getLength(Path path, Configuration conf) throws IOException {
    FileSystem mockFs = path.getFileSystem(conf);
    FileStatus stat = mockFs.getFileStatus(path);
    return (int) stat.getLen();
  }

  /**
   * Test vectorization, non-acid, non-combine.
   * @throws Exception
   */
  @Test
  public void testVectorization() throws Exception {
    // get the object inspector for MyRow
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"),
        "vectorization", inspector, true, 1);

    // write the orc file to the mock file system
    Path path = new Path(conf.get("mapred.input.dir") + "/0_0");
    Writer writer =
        OrcFile.createWriter(path,
           OrcFile.writerOptions(conf).blockPadding(false)
                  .bufferSize(1024).inspector(inspector));
    for(int i=0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2*i));
    }
    writer.close();
    setBlocks(path, conf, new MockBlock("host0", "host1"));

    // call getsplits
    HiveInputFormat<?,?> inputFormat =
        new HiveInputFormat<WritableComparable, Writable>();
    InputSplit[] splits = inputFormat.getSplits(conf, 10);
    assertEquals(1, splits.length);

    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>
        reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(10, value.count());
    LongColumnVector col0 = (LongColumnVector) value.cols[0];
    for(int i=0; i < 10; i++) {
      assertEquals("checking " + i, i, col0.vector[i]);
    }
    assertEquals(false, reader.next(key, value));
  }

  /**
   * Test vectorization, non-acid, non-combine.
   * @throws Exception
   */
  @Test
  public void testVectorizationWithBuckets() throws Exception {
    // get the object inspector for MyRow
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"),
        "vectorBuckets", inspector, true, 1);

    // write the orc file to the mock file system
    Path path = new Path(conf.get("mapred.input.dir") + "/0_0");
    Writer writer =
        OrcFile.createWriter(path,
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for(int i=0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2*i));
    }
    writer.close();
    setBlocks(path, conf, new MockBlock("host0", "host1"));

    // call getsplits
    conf.setInt(hive_metastoreConstants.BUCKET_COUNT, 3);
    HiveInputFormat<?,?> inputFormat =
        new HiveInputFormat<WritableComparable, Writable>();
    InputSplit[] splits = inputFormat.getSplits(conf, 10);
    assertEquals(1, splits.length);

    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>
        reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(10, value.count());
    LongColumnVector col0 = (LongColumnVector) value.cols[0];
    for(int i=0; i < 10; i++) {
      assertEquals("checking " + i, i, col0.vector[i]);
    }
    assertEquals(false, reader.next(key, value));
  }

  // test acid with vectorization, no combine
  @Test
  public void testVectorizationWithAcid() throws Exception {
    StructObjectInspector inspector = new BigRowInspector();
    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"),
        "vectorizationAcid", inspector, true, 1);

    // write the orc file to the mock file system
    Path partDir = new Path(conf.get("mapred.input.dir"));
    OrcRecordUpdater writer = new OrcRecordUpdater(partDir,
        new AcidOutputFormat.Options(conf).maximumTransactionId(10)
            .writingBase(true).bucket(0).inspector(inspector).finalDestination(partDir));
    for (int i = 0; i < 100; ++i) {
      BigRow row = new BigRow(i);
      writer.insert(10, row);
    }
    writer.close(false);
    Path path = new Path("mock:/vectorizationAcid/p=0/base_0000010/bucket_00000");
    setBlocks(path, conf, new MockBlock("host0", "host1"));

    // call getsplits
    HiveInputFormat<?, ?> inputFormat =
        new HiveInputFormat<WritableComparable, Writable>();
    InputSplit[] splits = inputFormat.getSplits(conf, 10);
    assertEquals(1, splits.length);

    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, BigRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, BigRow.getColumnTypesProperty());
    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);

    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>
        reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(100, value.count());
    LongColumnVector booleanColumn = (LongColumnVector) value.cols[0];
    LongColumnVector byteColumn = (LongColumnVector) value.cols[1];
    LongColumnVector shortColumn = (LongColumnVector) value.cols[2];
    LongColumnVector intColumn = (LongColumnVector) value.cols[3];
    LongColumnVector longColumn = (LongColumnVector) value.cols[4];
    DoubleColumnVector floatColumn = (DoubleColumnVector) value.cols[5];
    DoubleColumnVector doubleCoulmn = (DoubleColumnVector) value.cols[6];
    BytesColumnVector stringColumn = (BytesColumnVector) value.cols[7];
    DecimalColumnVector decimalColumn = (DecimalColumnVector) value.cols[8];
    LongColumnVector dateColumn = (LongColumnVector) value.cols[9];
    TimestampColumnVector timestampColumn = (TimestampColumnVector) value.cols[10];
    for(int i=0; i < 100; i++) {
      assertEquals("checking boolean " + i, i % 2 == 0 ? 1 : 0,
          booleanColumn.vector[i]);
      assertEquals("checking byte " + i, (byte) i,
          byteColumn.vector[i]);
      assertEquals("checking short " + i, (short) i, shortColumn.vector[i]);
      assertEquals("checking int " + i, i, intColumn.vector[i]);
      assertEquals("checking long " + i, i, longColumn.vector[i]);
      assertEquals("checking float " + i, i, floatColumn.vector[i], 0.0001);
      assertEquals("checking double " + i, i, doubleCoulmn.vector[i], 0.0001);
      Text strValue = new Text();
      strValue.set(stringColumn.vector[i], stringColumn.start[i],
          stringColumn.length[i]);
      assertEquals("checking string " + i, new Text(Long.toHexString(i)),
          strValue);
      assertEquals("checking decimal " + i, HiveDecimal.create(i),
          decimalColumn.vector[i].getHiveDecimal());
      assertEquals("checking date " + i, i, dateColumn.vector[i]);
      long millis = (long) i * MILLIS_IN_DAY;
      millis -= LOCAL_TIMEZONE.getOffset(millis);
      assertEquals("checking timestamp " + i, millis,
          timestampColumn.getTime(i));
    }
    assertEquals(false, reader.next(key, value));
  }

  // test non-vectorized, non-acid, combine
  @Test
  public void testCombinationInputFormat() throws Exception {
    // get the object inspector for MyRow
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"),
        "combination", inspector, false, 1);

    // write the orc file to the mock file system
    Path partDir = new Path(conf.get("mapred.input.dir"));
    Writer writer =
        OrcFile.createWriter(new Path(partDir, "0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for(int i=0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2*i));
    }
    writer.close();
    Path path = new Path("mock:/combination/p=0/0_0");
    setBlocks(path, conf, new MockBlock("host0", "host1"));
    MockFileSystem mockFs = (MockFileSystem) partDir.getFileSystem(conf);
    int length0 = getLength(path, conf);
    writer =
        OrcFile.createWriter(new Path(partDir, "1_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for(int i=10; i < 20; ++i) {
      writer.addRow(new MyRow(i, 2*i));
    }
    writer.close();
    Path path1 = new Path("mock:/combination/p=0/1_0");
    setBlocks(path1, conf, new MockBlock("host1", "host2"));

    // call getsplits
    HiveInputFormat<?,?> inputFormat =
        new CombineHiveInputFormat<WritableComparable, Writable>();
    InputSplit[] splits = inputFormat.getSplits(conf, 1);
    assertEquals(1, splits.length);
    CombineHiveInputFormat.CombineHiveInputSplit split =
        (CombineHiveInputFormat.CombineHiveInputSplit) splits[0];

    // check split
    assertEquals(2, split.getNumPaths());
    assertEquals(partDir.toString() + "/0_0", split.getPath(0).toString());
    assertEquals(partDir.toString() + "/1_0", split.getPath(1).toString());
    assertEquals(length0, split.getLength(0));
    assertEquals(getLength(path1, conf), split.getLength(1));
    assertEquals(0, split.getOffset(0));
    assertEquals(0, split.getOffset(1));
    // hadoop-1 gets 3 and hadoop-2 gets 0. *sigh*
    // best answer would be 1.
    assertTrue(3 >= split.getLocations().length);

    // read split
    org.apache.hadoop.mapred.RecordReader<CombineHiveKey, OrcStruct> reader =
        inputFormat.getRecordReader(split, conf, Reporter.NULL);
    CombineHiveKey key = reader.createKey();
    OrcStruct value = reader.createValue();
    for(int i=0; i < 20; i++) {
      assertEquals(true, reader.next(key, value));
      assertEquals(i, ((IntWritable) value.getFieldValue(0)).get());
    }
    assertEquals(false, reader.next(key, value));
  }

  // test non-vectorized, acid, combine
  @Test
  public void testCombinationInputFormatWithAcid() throws Exception {
    // get the object inspector for MyRow
    StructObjectInspector inspector;
    final int PARTITIONS = 2;
    final int BUCKETS = 3;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"),
        "combinationAcid", inspector, false, PARTITIONS);

    // write the orc file to the mock file system
    Path[] partDir = new Path[PARTITIONS];
    String[] paths = conf.getStrings("mapred.input.dir");
    for(int p=0; p < PARTITIONS; ++p) {
      partDir[p] = new Path(paths[p]);
    }

    // write a base file in partition 0
    OrcRecordUpdater writer = new OrcRecordUpdater(partDir[0],
        new AcidOutputFormat.Options(conf).maximumTransactionId(10)
            .writingBase(true).bucket(0).inspector(inspector).finalDestination(partDir[0]));
    for(int i=0; i < 10; ++i) {
      writer.insert(10, new MyRow(i, 2 * i));
    }
    writer.close(false);

    // base file
    Path base0 = new Path("mock:/combinationAcid/p=0/base_0000010/bucket_00000");
    setBlocks(base0, conf, new MockBlock("host1", "host2"));

    // write a delta file in partition 0
    writer = new OrcRecordUpdater(partDir[0],
        new AcidOutputFormat.Options(conf).maximumTransactionId(10)
            .writingBase(true).bucket(1).inspector(inspector).finalDestination(partDir[0]));
    for(int i=10; i < 20; ++i) {
      writer.insert(10, new MyRow(i, 2*i));
    }
    writer.close(false);
    Path base1 = new Path("mock:/combinationAcid/p=0/base_0000010/bucket_00001");
    setBlocks(base1, conf, new MockBlock("host1", "host2"));

    // write three files in partition 1
    for(int bucket=0; bucket < BUCKETS; ++bucket) {
      Path path = new Path(partDir[1], "00000" + bucket + "_0");
      Writer orc = OrcFile.createWriter(
          path,
          OrcFile.writerOptions(conf)
              .blockPadding(false)
              .bufferSize(1024)
              .inspector(inspector));
      orc.addRow(new MyRow(1, 2));
      orc.close();
      setBlocks(path, conf, new MockBlock("host3", "host4"));
    }

    // call getsplits
    conf.setInt(hive_metastoreConstants.BUCKET_COUNT, BUCKETS);
    HiveInputFormat<?,?> inputFormat =
        new CombineHiveInputFormat<WritableComparable, Writable>();
    InputSplit[] splits = inputFormat.getSplits(conf, 1);
    assertEquals(3, splits.length);
    HiveInputFormat.HiveInputSplit split =
        (HiveInputFormat.HiveInputSplit) splits[0];
    assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
        split.inputFormatClassName());
    assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00000",
        split.getPath().toString());
    assertEquals(0, split.getStart());
    assertEquals(607, split.getLength());
    split = (HiveInputFormat.HiveInputSplit) splits[1];
    assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
        split.inputFormatClassName());
    assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00001",
        split.getPath().toString());
    assertEquals(0, split.getStart());
    assertEquals(629, split.getLength());
    CombineHiveInputFormat.CombineHiveInputSplit combineSplit =
        (CombineHiveInputFormat.CombineHiveInputSplit) splits[2];
    assertEquals(BUCKETS, combineSplit.getNumPaths());
    for(int bucket=0; bucket < BUCKETS; ++bucket) {
      assertEquals("mock:/combinationAcid/p=1/00000" + bucket + "_0",
          combineSplit.getPath(bucket).toString());
      assertEquals(0, combineSplit.getOffset(bucket));
      assertEquals(241, combineSplit.getLength(bucket));
    }
    String[] hosts = combineSplit.getLocations();
    assertEquals(2, hosts.length);
  }

  @Test
  public void testSetSearchArgument() throws Exception {
    Reader.Options options = new Reader.Options();
    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
    OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
    builder.setKind(OrcProto.Type.Kind.STRUCT)
        .addAllFieldNames(Arrays.asList("op", "otid", "bucket", "rowid", "ctid",
            "row"))
        .addAllSubtypes(Arrays.asList(1,2,3,4,5,6));
    types.add(builder.build());
    builder.clear().setKind(OrcProto.Type.Kind.INT);
    types.add(builder.build());
    types.add(builder.build());
    types.add(builder.build());
    types.add(builder.build());
    types.add(builder.build());
    builder.clear().setKind(OrcProto.Type.Kind.STRUCT)
        .addAllFieldNames(Arrays.asList("url", "purchase", "cost", "store"))
        .addAllSubtypes(Arrays.asList(7, 8, 9, 10));
    types.add(builder.build());
    builder.clear().setKind(OrcProto.Type.Kind.STRING);
    types.add(builder.build());
    builder.clear().setKind(OrcProto.Type.Kind.INT);
    types.add(builder.build());
    types.add(builder.build());
    types.add(builder.build());
    SearchArgument isNull = SearchArgumentFactory.newBuilder()
        .startAnd().isNull("cost", PredicateLeaf.Type.LONG).end().build();
    conf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, toKryo(isNull));
    conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
        "url,cost");
    options.include(new boolean[]{true, true, false, true, false});
    OrcInputFormat.setSearchArgument(options, types, conf, false);
    String[] colNames = options.getColumnNames();
    assertEquals(null, colNames[0]);
    assertEquals("url", colNames[1]);
    assertEquals(null, colNames[2]);
    assertEquals("cost", colNames[3]);
    assertEquals(null, colNames[4]);
    SearchArgument arg = options.getSearchArgument();
    List<PredicateLeaf> leaves = arg.getLeaves();
    assertEquals("cost", leaves.get(0).getColumnName());
    assertEquals(PredicateLeaf.Operator.IS_NULL, leaves.get(0).getOperator());
  }

  @Test
  public void testSplitElimination() throws Exception {
    Properties properties = new Properties();
    properties.setProperty("columns", "z,r");
    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    AbstractSerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    conf.setInt("mapred.max.split.size", 50);
    RecordWriter writer =
        outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
            Reporter.NULL);
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(1,2,3), inspector));
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(4,5,6), inspector));
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(7,8,9), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    SearchArgument sarg =
        SearchArgumentFactory.newBuilder()
            .startAnd()
            .lessThan("z", PredicateLeaf.Type.LONG, new Long(0))
            .end()
            .build();
    conf.set("sarg.pushdown", toKryo(sarg));
    conf.set("hive.io.file.readcolumn.names", "z,r");
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(0, splits.length);
  }

  @Test
  public void testSplitEliminationNullStats() throws Exception {
    Properties properties = new Properties();
    StructObjectInspector inspector = createSoi();
    AbstractSerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    conf.setInt("mapred.max.split.size", 50);
    RecordWriter writer =
        outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
            Reporter.NULL);
    writer.write(NullWritable.get(),
        serde.serialize(new SimpleRow(null), inspector));
    writer.write(NullWritable.get(),
        serde.serialize(new SimpleRow(null), inspector));
    writer.write(NullWritable.get(),
        serde.serialize(new SimpleRow(null), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    SearchArgument sarg =
        SearchArgumentFactory.newBuilder()
            .startAnd()
            .lessThan("z", PredicateLeaf.Type.STRING, new String("foo"))
            .end()
            .build();
    conf.set("sarg.pushdown", toKryo(sarg));
    conf.set("hive.io.file.readcolumn.names", "z");
    properties.setProperty("columns", "z");
    properties.setProperty("columns.types", "string");
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(0, splits.length);
  }

  @Test
  public void testDoAs() throws Exception {
    conf.setInt(ConfVars.HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS.varname, 1);
    conf.set(ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "ETL");
    conf.setBoolean(ConfVars.HIVE_IN_TEST.varname, true);
    conf.setClass("fs.mock.impl", MockFileSystem.class, FileSystem.class);
    String badUser = UserGroupInformation.getCurrentUser().getShortUserName() + "-foo";
    MockFileSystem.setBlockedUgi(badUser);
    MockFileSystem.clearGlobalFiles();
    OrcInputFormat.Context.resetThreadPool(); // We need the size above to take effect.
    try {
      // OrcInputFormat will get a mock fs from FileSystem.get; add global files.
      MockFileSystem.addGlobalFile(new MockFile("mock:/ugi/1/file", 10000,
          createMockOrcFile(197, 300, 600), new MockBlock("host1-1", "host1-2", "host1-3")));
      MockFileSystem.addGlobalFile(new MockFile("mock:/ugi/2/file", 10000,
          createMockOrcFile(197, 300, 600), new MockBlock("host1-1", "host1-2", "host1-3")));
      FileInputFormat.setInputPaths(conf, "mock:/ugi/1");
      UserGroupInformation ugi = UserGroupInformation.createUserForTesting(badUser, new String[0]);
      assertEquals(0, OrcInputFormat.Context.getCurrentThreadPoolSize());
      try {
        ugi.doAs(new PrivilegedExceptionAction<Void>() {
          @Override
          public Void run() throws Exception {
            OrcInputFormat.generateSplitsInfo(conf, new Context(conf, -1, null));
            return null;
          }
        });
        fail("Didn't throw");
      } catch (Exception ex) {
        Throwable cause = ex;
        boolean found = false;
        while (cause != null) {
          if (cause instanceof MockFileSystem.MockAccessDenied) {
            found = true; // Expected.
            break;
          }
          cause = cause.getCause();
        }
        if (!found) throw ex; // Unexpected.
      }
      assertEquals(1, OrcInputFormat.Context.getCurrentThreadPoolSize());
      FileInputFormat.setInputPaths(conf, "mock:/ugi/2");
      List<OrcSplit> splits = OrcInputFormat.generateSplitsInfo(conf, new Context(conf, -1, null));
      assertEquals(1, splits.size());
    } finally {
      MockFileSystem.clearGlobalFiles();
    }
  }


  private StructObjectInspector createSoi() {
    synchronized (TestOrcFile.class) {
      return (StructObjectInspector)ObjectInspectorFactory.getReflectionObjectInspector(
          SimpleRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
  }

  @Test
  public void testSplitGenReadOps() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    conf.set("mapred.input.dir", "mock:///mocktable");
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    MockPath mockPath = new MockPath(fs, "mock:///mocktable");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer =
        OrcFile.createWriter(new Path(mockPath + "/0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for(int i=0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2*i));
    }
    writer.close();

    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
        OrcFile.writerOptions(conf).blockPadding(false)
            .bufferSize(1024).inspector(inspector));
    for(int i=0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2*i));
    }
    writer.close();

    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: listLocatedStatus - mock:/mocktable
    // call-2: open - mock:/mocktable/0_0
    // call-3: open - mock:/mocktable/0_1
    assertEquals(3, readOpsDelta);

    assertEquals(2, splits.length);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
  }

  @Test
  public void testSplitGenReadOpsLocalCache() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    // creates the static cache
    MockPath mockPath = new MockPath(fs, "mock:///mocktbl");
    conf.set(ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_MEMORY_SIZE.varname, "0");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer =
        OrcFile.createWriter(new Path(mockPath + "/0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
        OrcFile.writerOptions(conf).blockPadding(false)
            .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: listLocatedStatus - mock:/mocktbl
    // call-2: open - mock:/mocktbl/0_0
    // call-3: open - mock:/mocktbl/0_1
    assertEquals(3, readOpsDelta);

    // force BI to avoid reading footers
    conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI");
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: listLocatedStatus - mock:/mocktbl
    assertEquals(1, readOpsDelta);

    // enable cache and use default strategy
    conf.set(ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_MEMORY_SIZE.varname, "10Mb");
    conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "HYBRID");
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: listLocatedStatus - mock:/mocktbl
    // call-2: open - mock:/mocktbl/0_0
    // call-3: open - mock:/mocktbl/0_1
    assertEquals(3, readOpsDelta);

    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: listLocatedStatus - mock:/mocktbl
    assertEquals(1, readOpsDelta);

    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
  }

  @Test
  public void testSplitGenReadOpsLocalCacheChangeFileLen() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    // creates the static cache
    MockPath mockPath = new MockPath(fs, "mock:///mocktbl1");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer =
        OrcFile.createWriter(new Path(mockPath + "/0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
        OrcFile.writerOptions(conf).blockPadding(false)
            .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: listLocatedStatus - mock:/mocktable
    // call-2: open - mock:/mocktbl1/0_0
    // call-3: open - mock:/mocktbl1/0_1
    assertEquals(3, readOpsDelta);

    // change file length and look for cache misses

    fs.clear();

    writer =
        OrcFile.createWriter(new Path(mockPath + "/0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 100; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
        OrcFile.writerOptions(conf).blockPadding(false)
            .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 100; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: listLocatedStatus - mock:/mocktable
    // call-2: open - mock:/mocktbl1/0_0
    // call-3: open - mock:/mocktbl1/0_1
    assertEquals(3, readOpsDelta);

    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: listLocatedStatus - mock:/mocktbl1
    assertEquals(1, readOpsDelta);

    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
  }

  @Test
  public void testSplitGenReadOpsLocalCacheChangeModificationTime() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    // creates the static cache
    MockPath mockPath = new MockPath(fs, "mock:///mocktbl2");
    conf.set("hive.orc.cache.use.soft.references", "true");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer =
        OrcFile.createWriter(new Path(mockPath + "/0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
        OrcFile.writerOptions(conf).blockPadding(false)
            .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    // call-2: open - mock:/mocktbl2/0_0
    // call-3: open - mock:/mocktbl2/0_1
    assertEquals(3, readOpsDelta);

    // change file modification time and look for cache misses
    FileSystem fs1 = FileSystem.get(conf);
    MockFile mockFile = ((MockFileSystem) fs1).findFile(new Path(mockPath + "/0_0"));
    ((MockFileSystem) fs1).touch(mockFile);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    // call-2: open - mock:/mocktbl2/0_1
    assertEquals(2, readOpsDelta);

    // touch the next file
    fs1 = FileSystem.get(conf);
    mockFile = ((MockFileSystem) fs1).findFile(new Path(mockPath + "/0_1"));
    ((MockFileSystem) fs1).touch(mockFile);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    // call-2: open - mock:/mocktbl2/0_0
    assertEquals(2, readOpsDelta);

    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: listLocatedStatus - mock:/mocktbl2
    assertEquals(1, readOpsDelta);

    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
  }

  @Test
  public void testNonVectorReaderNoFooterSerialize() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable1");
    conf.set("hive.orc.splits.include.file.footer", "false");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer =
        OrcFile.createWriter(new Path(mockPath + "/0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
        OrcFile.writerOptions(conf).blockPadding(false)
            .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);

    for (InputSplit split : splits) {
      assertTrue("OrcSplit is expected", split instanceof OrcSplit);
      // ETL strategies will have start=3 (start of first stripe)
      assertTrue(split.toString().contains("start=3"));
      assertTrue(split.toString().contains("hasFooter=false"));
      assertTrue(split.toString().contains("hasBase=true"));
      assertTrue(split.toString().contains("deltas=0"));
      if (split instanceof OrcSplit) {
        assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" +
            " orc splits.", ((OrcSplit) split).hasFooter());
      }
      orcInputFormat.getRecordReader(split, conf, null);
    }

    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: open to read footer - split 1 => mock:/mocktable1/0_0
    // call-2: open to read data - split 1 => mock:/mocktable1/0_0
    // call-3: open to read footer - split 2 => mock:/mocktable1/0_1
    // call-4: open to read data - split 2 => mock:/mocktable1/0_1
    assertEquals(4, readOpsDelta);

    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
  }

  @Test
  public void testNonVectorReaderFooterSerialize() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable2");
    conf.set("hive.orc.splits.include.file.footer", "true");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer =
        OrcFile.createWriter(new Path(mockPath + "/0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
        OrcFile.writerOptions(conf).blockPadding(false)
            .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);

    for (InputSplit split : splits) {
      assertTrue("OrcSplit is expected", split instanceof OrcSplit);
      // ETL strategies will have start=3 (start of first stripe)
      assertTrue(split.toString().contains("start=3"));
      assertTrue(split.toString().contains("hasFooter=true"));
      assertTrue(split.toString().contains("hasBase=true"));
      assertTrue(split.toString().contains("deltas=0"));
      if (split instanceof OrcSplit) {
        assertTrue("Footer serialize test for non-vector reader, hasFooter is expected in" +
            " orc splits.", ((OrcSplit) split).hasFooter());
      }
      orcInputFormat.getRecordReader(split, conf, null);
    }

    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: open to read data - split 1 => mock:/mocktable2/0_0
    // call-2: open to read data - split 2 => mock:/mocktable2/0_1
    assertEquals(2, readOpsDelta);

    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
  }

  @Test
  public void testVectorReaderNoFooterSerialize() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable3");
    conf.set("hive.orc.splits.include.file.footer", "false");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    JobConf jobConf = createMockExecutionEnvironment(workDir, new Path("mock:///"),
        "mocktable3", inspector, true, 0);
    Writer writer =
        OrcFile.createWriter(new Path(mockPath + "/0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
        OrcFile.writerOptions(conf).blockPadding(false)
            .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);

    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);

    for (InputSplit split : splits) {
      assertTrue("OrcSplit is expected", split instanceof OrcSplit);
      // ETL strategies will have start=3 (start of first stripe)
      assertTrue(split.toString().contains("start=3"));
      assertTrue(split.toString().contains("hasFooter=false"));
      assertTrue(split.toString().contains("hasBase=true"));
      assertTrue(split.toString().contains("deltas=0"));
      if (split instanceof OrcSplit) {
        assertFalse("No footer serialize test for vector reader, hasFooter is not expected in" +
            " orc splits.", ((OrcSplit) split).hasFooter());
      }
      orcInputFormat.getRecordReader(split, jobConf, Reporter.NULL);
    }

    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: open to read footer - split 1 => mock:/mocktable3/0_0
    // call-2: open to read data - split 1 => mock:/mocktable3/0_0
    // call-3: open to read footer - split 2 => mock:/mocktable3/0_1
    // call-4: open to read data - split 2 => mock:/mocktable3/0_1
    assertEquals(4, readOpsDelta);

    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
  }

  @Test
  public void testVectorReaderFooterSerialize() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable4");
    conf.set("hive.orc.splits.include.file.footer", "true");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    JobConf jobConf = createMockExecutionEnvironment(workDir, new Path("mock:///"),
        "mocktable4", inspector, true, 0);
    Writer writer =
        OrcFile.createWriter(new Path(mockPath + "/0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
        OrcFile.writerOptions(conf).blockPadding(false)
            .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);

    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);

    for (InputSplit split : splits) {
      assertTrue("OrcSplit is expected", split instanceof OrcSplit);
      // ETL strategies will have start=3 (start of first stripe)
      assertTrue(split.toString().contains("start=3"));
      assertTrue(split.toString().contains("hasFooter=true"));
      assertTrue(split.toString().contains("hasBase=true"));
      assertTrue(split.toString().contains("deltas=0"));
      if (split instanceof OrcSplit) {
        assertTrue("Footer serialize test for vector reader, hasFooter is expected in" +
            " orc splits.", ((OrcSplit) split).hasFooter());
      }
      orcInputFormat.getRecordReader(split, jobConf, Reporter.NULL);
    }

    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: open to read data - split 1 => mock:/mocktable4/0_0
    // call-2: open to read data - split 2 => mock:/mocktable4/0_1
    assertEquals(2, readOpsDelta);

    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
  }

  @Test
  public void testACIDReaderNoFooterSerialize() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable5");
    conf.set("hive.transactional.table.scan", "true");
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    conf.set("hive.orc.splits.include.file.footer", "false");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer =
        OrcFile.createWriter(new Path(mockPath + "/0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
        OrcFile.writerOptions(conf).blockPadding(false)
            .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);

    for (InputSplit split : splits) {
      assertTrue("OrcSplit is expected", split instanceof OrcSplit);
      // ETL strategies will have start=3 (start of first stripe)
      assertTrue(split.toString().contains("start=3"));
      assertTrue(split.toString().contains("hasFooter=false"));
      assertTrue(split.toString().contains("hasBase=true"));
      assertTrue(split.toString().contains("deltas=0"));
      if (split instanceof OrcSplit) {
        assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" +
            " orc splits.", ((OrcSplit) split).hasFooter());
      }
      orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
    }

    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: open to read footer - split 1 => mock:/mocktable5/0_0
    // call-2: open to read data - split 1 => mock:/mocktable5/0_0
    // call-3: open to read footer - split 2 => mock:/mocktable5/0_1
    // call-4: open to read data - split 2 => mock:/mocktable5/0_1
    assertEquals(4, readOpsDelta);

    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
  }

  @Test
  public void testACIDReaderFooterSerialize() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable6");
    conf.set("hive.transactional.table.scan", "true");
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    conf.set("hive.orc.splits.include.file.footer", "true");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer =
        OrcFile.createWriter(new Path(mockPath + "/0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"),
        OrcFile.writerOptions(conf).blockPadding(false)
            .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);

    for (InputSplit split : splits) {
      assertTrue("OrcSplit is expected", split instanceof OrcSplit);
      // ETL strategies will have start=3 (start of first stripe)
      assertTrue(split.toString().contains("start=3"));
      assertTrue(split.toString().contains("hasFooter=true"));
      assertTrue(split.toString().contains("hasBase=true"));
      assertTrue(split.toString().contains("deltas=0"));
      if (split instanceof OrcSplit) {
        assertTrue("Footer serialize test for ACID reader, hasFooter is expected in" +
            " orc splits.", ((OrcSplit) split).hasFooter());
      }
      orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
    }

    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: open to read data - split 1 => mock:/mocktable6/0_0
    // call-2: open to read data - split 2 => mock:/mocktable6/0_1
    assertEquals(2, readOpsDelta);

    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
  }

  @Test
  public void testACIDReaderNoFooterSerializeWithDeltas() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable7");
    conf.set("hive.transactional.table.scan", "true");
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    conf.set("hive.orc.splits.include.file.footer", "false");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer =
        OrcFile.createWriter(new Path(mockPath + "/0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    writer = OrcFile.createWriter(new Path(new Path(mockPath + "/delta_001_002") + "/0_1"),
        OrcFile.writerOptions(conf).blockPadding(false)
            .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(1, splits.length);
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);

    for (InputSplit split : splits) {
      assertTrue("OrcSplit is expected", split instanceof OrcSplit);
      // ETL strategies will have start=3 (start of first stripe)
      assertTrue(split.toString().contains("start=3"));
      assertTrue(split.toString().contains("hasFooter=false"));
      assertTrue(split.toString().contains("hasBase=true"));
      // NOTE: don't be surprised if deltas value is different
      // in older release deltas=2 as min and max transaction are added separately to delta list.
      // in newer release since both of them are put together deltas=1
      assertTrue(split.toString().contains("deltas=1"));
      if (split instanceof OrcSplit) {
        assertFalse("No footer serialize test for ACID reader, hasFooter is not expected in" +
            " orc splits.", ((OrcSplit) split).hasFooter());
      }
      orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
    }

    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: open to read footer - split 1 => mock:/mocktable7/0_0
    // call-2: open to read data - split 1 => mock:/mocktable7/0_0
    // call-3: open side file (flush length) of delta directory
    // call-4: fs.exists() check for delta_xxx_xxx/bucket_00000 file
    assertEquals(4, readOpsDelta);

    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
  }

  @Test
  public void testACIDReaderFooterSerializeWithDeltas() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    MockPath mockPath = new MockPath(fs, "mock:///mocktable8");
    conf.set("hive.transactional.table.scan", "true");
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    conf.set("hive.orc.splits.include.file.footer", "true");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer =
        OrcFile.createWriter(new Path(mockPath + "/0_0"),
            OrcFile.writerOptions(conf).blockPadding(false)
                .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    writer = OrcFile.createWriter(new Path(new Path(mockPath + "/delta_001_002") + "/0_1"),
        OrcFile.writerOptions(conf).blockPadding(false)
            .bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
      writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();

    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(1, splits.length);
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsBefore = statistics.getReadOps();
      }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);

    for (InputSplit split : splits) {
      assertTrue("OrcSplit is expected", split instanceof OrcSplit);
      // ETL strategies will have start=3 (start of first stripe)
      assertTrue(split.toString().contains("start=3"));
      assertTrue(split.toString().contains("hasFooter=true"));
      assertTrue(split.toString().contains("hasBase=true"));
      // NOTE: don't be surprised if deltas value is different
      // in older release deltas=2 as min and max transaction are added separately to delta list.
      // in newer release since both of them are put together deltas=1
      assertTrue(split.toString().contains("deltas=1"));
      if (split instanceof OrcSplit) {
        assertTrue("Footer serialize test for ACID reader, hasFooter is not expected in" +
            " orc splits.", ((OrcSplit) split).hasFooter());
      }
      orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
    }

    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
      if (statistics.getScheme().equalsIgnoreCase("mock")) {
        readOpsDelta = statistics.getReadOps() - readOpsBefore;
      }
    }
    // call-1: open to read data - split 1 => mock:/mocktable8/0_0
    // call-2: open side file (flush length) of delta directory
    // call-3: fs.exists() check for delta_xxx_xxx/bucket_00000 file
    assertEquals(3, readOpsDelta);

    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
  }

  /**
   * also see {@link TestOrcFile#testPredicatePushdown()}
   * This tests that {@link RecordReader#getRowNumber()} works with multiple splits
   * @throws Exception
   */
  @Test
  public void testRowNumberUniquenessInDifferentSplits() throws Exception {
    Properties properties = new Properties();
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
        ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }

    // Save the conf variable values so that they can be restored later.
    long oldDefaultStripeSize = conf.getLong(OrcConf.STRIPE_SIZE.getHiveConfName(), -1L);
    long oldMaxSplitSize = conf.getLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, -1L);

    // Set the conf variable values for this test.
    long newStripeSize = 10000L; // 10000 bytes per stripe
    long newMaxSplitSize = 100L; // 1024 bytes per split
    conf.setLong(OrcConf.STRIPE_SIZE.getHiveConfName(), newStripeSize);
    conf.setLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, newMaxSplitSize);

    AbstractSerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer =
      outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
        properties, Reporter.NULL);
    // The following loop should create 20 stripes in the orc file.
    for (int i = 0; i < newStripeSize * 10; ++i) {
      writer.write(serde.serialize(new MyRow(i,i+1), inspector));
    }
    writer.close(true);
    serde = new OrcSerde();
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<x:int,y:int>", inspector.getTypeName());
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    int numExpectedSplits = 20;
    InputSplit[] splits = in.getSplits(conf, numExpectedSplits);
    assertEquals(numExpectedSplits, splits.length);

    for (int i = 0; i < numExpectedSplits; ++i) {
      OrcSplit split = (OrcSplit) splits[i];
      Reader.Options orcReaderOptions = new Reader.Options();
      orcReaderOptions.range(split.getStart(), split.getLength());
      OrcFile.ReaderOptions qlReaderOptions = OrcFile.readerOptions(conf).maxLength(split.getFileLength());
      Reader reader = OrcFile.createReader(split.getPath(), qlReaderOptions);
      RecordReader recordReader = reader.rowsOptions(orcReaderOptions);
      for(int j = 0; recordReader.hasNext(); j++) {
        long rowNum = (i * 5000) + j;
        long rowNumActual = recordReader.getRowNumber();
        assertEquals("rowNum=" + rowNum, rowNum, rowNumActual);
        Object row = recordReader.next(null);
      }
      recordReader.close();
    }

    // Reset the conf variable values that we changed for this test.
    if (oldDefaultStripeSize != -1L) {
      conf.setLong(OrcConf.STRIPE_SIZE.getHiveConfName(), oldDefaultStripeSize);
    } else {
      // this means that nothing was set for default stripe size previously, so we should unset it.
      conf.unset(OrcConf.STRIPE_SIZE.getHiveConfName());
    }
    if (oldMaxSplitSize != -1L) {
      conf.setLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, oldMaxSplitSize);
    } else {
      // this means that nothing was set for default stripe size previously, so we should unset it.
      conf.unset(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname);
    }
  }

  /**
   * Test schema evolution when using the reader directly.
   */
  @Test
  public void testSchemaEvolution() throws Exception {
    TypeDescription fileSchema =
        TypeDescription.fromString("struct<a:int,b:struct<c:int>,d:string>");
    Writer writer = OrcFile.createWriter(testFilePath,
        OrcFile.writerOptions(conf)
            .fileSystem(fs)
            .setSchema(fileSchema)
            .compress(org.apache.orc.CompressionKind.NONE));
    VectorizedRowBatch batch = fileSchema.createRowBatch(1000);
    batch.size = 1000;
    LongColumnVector lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
    for(int r=0; r < 1000; r++) {
      ((LongColumnVector) batch.cols[0]).vector[r] = r * 42;
      lcv.vector[r] = r * 10001;
      ((BytesColumnVector) batch.cols[2]).setVal(r,
          Integer.toHexString(r).getBytes(StandardCharsets.UTF_8));
    }
    writer.addRowBatch(batch);
    writer.close();
    TypeDescription readerSchema = TypeDescription.fromString(
        "struct<a:int,b:struct<c:int,future1:int>,d:string,future2:int>");
    Reader reader = OrcFile.createReader(testFilePath,
        OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rowsOptions(new Reader.Options()
        .schema(readerSchema));
    batch = readerSchema.createRowBatch();
    lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
    LongColumnVector future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]);
    assertEquals(true, rows.nextBatch(batch));
    assertEquals(1000, batch.size);
    assertEquals(true, future1.isRepeating);
    assertEquals(true, future1.isNull[0]);
    assertEquals(true, batch.cols[3].isRepeating);
    assertEquals(true, batch.cols[3].isNull[0]);
    for(int r=0; r < batch.size; ++r) {
      assertEquals("row " + r, r * 42, ((LongColumnVector) batch.cols[0]).vector[r]);
      assertEquals("row " + r, r * 10001, lcv.vector[r]);
      assertEquals("row " + r, r * 10001, lcv.vector[r]);
      assertEquals("row " + r, Integer.toHexString(r),
          ((BytesColumnVector) batch.cols[2]).toString(r));
    }
    assertEquals(false, rows.nextBatch(batch));
    rows.close();

    // try it again with an include vector
    rows = reader.rowsOptions(new Reader.Options()
        .schema(readerSchema)
        .include(new boolean[]{false, true, true, true, false, false, true}));
    batch = readerSchema.createRowBatch();
    lcv = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[0]);
    future1 = ((LongColumnVector) ((StructColumnVector) batch.cols[1]).fields[1]);
    assertEquals(true, rows.nextBatch(batch));
    assertEquals(1000, batch.size);
    assertEquals(true, future1.isRepeating);
    assertEquals(true, future1.isNull[0]);
    assertEquals(true, batch.cols[3].isRepeating);
    assertEquals(true, batch.cols[3].isNull[0]);
    assertEquals(true, batch.cols[2].isRepeating);
    assertEquals(true, batch.cols[2].isNull[0]);
    for(int r=0; r < batch.size; ++r) {
      assertEquals("row " + r, r * 42, ((LongColumnVector) batch.cols[0]).vector[r]);
      assertEquals("row " + r, r * 10001, lcv.vector[r]);
    }
    assertEquals(false, rows.nextBatch(batch));
    rows.close();
  }

  /**
   * Test column projection when using ACID.
   */
  @Test
  public void testColumnProjectionWithAcid() throws Exception {
    Path baseDir = new Path(workDir, "base_00100");
    testFilePath = new Path(baseDir, "bucket_00000");
    fs.mkdirs(baseDir);
    fs.delete(testFilePath, true);
    TypeDescription fileSchema =
        TypeDescription.fromString("struct<operation:int," +
            "originalTransaction:bigint,bucket:int,rowId:bigint," +
            "currentTransaction:bigint," +
            "row:struct<a:int,b:struct<c:int>,d:string>>");
    Writer writer = OrcFile.createWriter(testFilePath,
        OrcFile.writerOptions(conf)
            .fileSystem(fs)
            .setSchema(fileSchema)
            .compress(org.apache.orc.CompressionKind.NONE));
    VectorizedRowBatch batch = fileSchema.createRowBatch(1000);
    batch.size = 1000;
    StructColumnVector scv = (StructColumnVector)batch.cols[5];
    // operation
    batch.cols[0].isRepeating = true;
    ((LongColumnVector) batch.cols[0]).vector[0] = 0;
    // original transaction
    batch.cols[1].isRepeating = true;
    ((LongColumnVector) batch.cols[1]).vector[0] = 1;
    // bucket
    batch.cols[2].isRepeating = true;
    ((LongColumnVector) batch.cols[2]).vector[0] = 0;
    // current transaction
    batch.cols[4].isRepeating = true;
    ((LongColumnVector) batch.cols[4]).vector[0] = 1;

    LongColumnVector lcv = (LongColumnVector)
        ((StructColumnVector) scv.fields[1]).fields[0];
    for(int r=0; r < 1000; r++) {
      // row id
      ((LongColumnVector) batch.cols[3]).vector[r] = r;
      // a
      ((LongColumnVector) scv.fields[0]).vector[r] = r * 42;
      // b.c
      lcv.vector[r] = r * 10001;
      // d
      ((BytesColumnVector) scv.fields[2]).setVal(r,
          Integer.toHexString(r).getBytes(StandardCharsets.UTF_8));
    }
    writer.addRowBatch(batch);
    writer.addUserMetadata(OrcRecordUpdater.ACID_KEY_INDEX_NAME,
        ByteBuffer.wrap("0,0,999".getBytes(StandardCharsets.UTF_8)));
    writer.close();
    long fileLength = fs.getFileStatus(testFilePath).getLen();

    // test with same schema with include
    conf.set(ValidTxnList.VALID_TXNS_KEY, "100:99:");
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "a,b,d");
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "int,struct<c:int>,string");
    conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false");
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2");
    OrcSplit split = new OrcSplit(testFilePath, null, 0, fileLength,
        new String[0], null, false, true,
        new ArrayList<AcidInputFormat.DeltaMetaData>(), fileLength, fileLength);
    OrcInputFormat inputFormat = new OrcInputFormat();
    AcidInputFormat.RowReader<OrcStruct> reader = inputFormat.getReader(split,
        new AcidInputFormat.Options(conf));
    int record = 0;
    RecordIdentifier id = reader.createKey();
    OrcStruct struct = reader.createValue();
    while (reader.next(id, struct)) {
      assertEquals("id " + record, record, id.getRowId());
      assertEquals("bucket " + record, 0, id.getBucketId());
      assertEquals("trans " + record, 1, id.getTransactionId());
      assertEquals("a " + record,
          42 * record, ((IntWritable) struct.getFieldValue(0)).get());
      assertEquals(null, struct.getFieldValue(1));
      assertEquals("d " + record,
          Integer.toHexString(record), struct.getFieldValue(2).toString());
      record += 1;
    }
    assertEquals(1000, record);
    reader.close();

    // test with schema evolution and include
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "a,b,d,f");
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "int,struct<c:int,e:string>,string,int");
    conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false");
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2,3");
    split = new OrcSplit(testFilePath, null, 0, fileLength,
        new String[0], null, false, true,
        new ArrayList<AcidInputFormat.DeltaMetaData>(), fileLength, fileLength);
    inputFormat = new OrcInputFormat();
    reader = inputFormat.getReader(split, new AcidInputFormat.Options(conf));
    record = 0;
    id = reader.createKey();
    struct = reader.createValue();
    while (reader.next(id, struct)) {
      assertEquals("id " + record, record, id.getRowId());
      assertEquals("bucket " + record, 0, id.getBucketId());
      assertEquals("trans " + record, 1, id.getTransactionId());
      assertEquals("a " + record,
          42 * record, ((IntWritable) struct.getFieldValue(0)).get());
      assertEquals(null, struct.getFieldValue(1));
      assertEquals("d " + record,
          Integer.toHexString(record), struct.getFieldValue(2).toString());
      assertEquals("f " + record, null, struct.getFieldValue(3));
      record += 1;
    }
    assertEquals(1000, record);
    reader.close();
  }
}