OrcRecordUpdater.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.io.orc;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.ql.io.RecordUpdater;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.orc.OrcConf;
import org.apache.orc.impl.AcidStats;
import org.apache.orc.impl.OrcAcidUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.annotations.VisibleForTesting;

/**
 * A RecordUpdater where the files are stored as ORC.
 */
public class OrcRecordUpdater implements RecordUpdater {

  private static final Logger LOG = LoggerFactory.getLogger(OrcRecordUpdater.class);

  public static final String ACID_KEY_INDEX_NAME = "hive.acid.key.index";
  public static final String ACID_FORMAT = "_orc_acid_version";
  public static final int ORC_ACID_VERSION = 0;


  final static int INSERT_OPERATION = 0;
  final static int UPDATE_OPERATION = 1;
  final static int DELETE_OPERATION = 2;

  final static int OPERATION = 0;
  final static int ORIGINAL_TRANSACTION = 1;
  final static int BUCKET = 2;
  final static int ROW_ID = 3;
  final static int CURRENT_TRANSACTION = 4;
  final static int ROW = 5;
  final static int FIELDS = 6;

  final static int DELTA_BUFFER_SIZE = 16 * 1024;
  final static long DELTA_STRIPE_SIZE = 16 * 1024 * 1024;

  private static final Charset UTF8 = Charset.forName("UTF-8");

  private final AcidOutputFormat.Options options;
  private final AcidUtils.AcidOperationalProperties acidOperationalProperties;
  private final Path path;
  private Path deleteEventPath;
  private final FileSystem fs;
  private OrcFile.WriterOptions writerOptions;
  private Writer writer = null;
  private boolean writerClosed = false;
  private Writer deleteEventWriter = null;
  private final FSDataOutputStream flushLengths;
  private final OrcStruct item;
  private final IntWritable operation = new IntWritable();
  private final LongWritable currentTransaction = new LongWritable(-1);
  private final LongWritable originalTransaction = new LongWritable(-1);
  private final IntWritable bucket = new IntWritable();
  private final LongWritable rowId = new LongWritable();
  private long insertedRows = 0;
  private long rowIdOffset = 0;
  // This records how many rows have been inserted or deleted.  It is separate from insertedRows
  // because that is monotonically increasing to give new unique row ids.
  private long rowCountDelta = 0;
  private final KeyIndexBuilder indexBuilder = new KeyIndexBuilder();
  private KeyIndexBuilder deleteEventIndexBuilder;
  private StructField recIdField = null; // field to look for the record identifier in
  private StructField rowIdField = null; // field inside recId to look for row id in
  private StructField originalTxnField = null;  // field inside recId to look for original txn in
  private StructField bucketField = null; // field inside recId to look for bucket in
  private StructObjectInspector rowInspector; // OI for the original row
  private StructObjectInspector recIdInspector; // OI for the record identifier struct
  private LongObjectInspector rowIdInspector; // OI for the long row id inside the recordIdentifier
  private LongObjectInspector origTxnInspector; // OI for the original txn inside the record
  // identifer

  static int getOperation(OrcStruct struct) {
    return ((IntWritable) struct.getFieldValue(OPERATION)).get();
  }

  static long getCurrentTransaction(OrcStruct struct) {
    return ((LongWritable) struct.getFieldValue(CURRENT_TRANSACTION)).get();
  }

  static long getOriginalTransaction(OrcStruct struct) {
    return ((LongWritable) struct.getFieldValue(ORIGINAL_TRANSACTION)).get();
  }

  static int getBucket(OrcStruct struct) {
    return ((IntWritable) struct.getFieldValue(BUCKET)).get();
  }

  static long getRowId(OrcStruct struct) {
    return ((LongWritable) struct.getFieldValue(ROW_ID)).get();
  }

  static OrcStruct getRow(OrcStruct struct) {
    if (struct == null) {
      return null;
    } else {
      return (OrcStruct) struct.getFieldValue(ROW);
    }
  }

  /**
   * An extension to AcidOutputFormat that allows users to add additional
   * options.
   */
  public static class OrcOptions extends AcidOutputFormat.Options {
    OrcFile.WriterOptions orcOptions = null;

    public OrcOptions(Configuration conf) {
      super(conf);
    }

    public OrcOptions orcOptions(OrcFile.WriterOptions opts) {
      this.orcOptions = opts;
      return this;
    }

    public OrcFile.WriterOptions getOrcOptions() {
      return orcOptions;
    }
  }

  /**
   * Create an object inspector for the ACID event based on the object inspector
   * for the underlying row.
   * @param rowInspector the row's object inspector
   * @return an object inspector for the event stream
   */
  static StructObjectInspector createEventSchema(ObjectInspector rowInspector) {
    List<StructField> fields = new ArrayList<StructField>();
    fields.add(new OrcStruct.Field("operation",
        PrimitiveObjectInspectorFactory.writableIntObjectInspector, OPERATION));
    fields.add(new OrcStruct.Field("originalTransaction",
        PrimitiveObjectInspectorFactory.writableLongObjectInspector,
        ORIGINAL_TRANSACTION));
    fields.add(new OrcStruct.Field("bucket",
        PrimitiveObjectInspectorFactory.writableIntObjectInspector, BUCKET));
    fields.add(new OrcStruct.Field("rowId",
        PrimitiveObjectInspectorFactory.writableLongObjectInspector, ROW_ID));
    fields.add(new OrcStruct.Field("currentTransaction",
        PrimitiveObjectInspectorFactory.writableLongObjectInspector,
        CURRENT_TRANSACTION));
    fields.add(new OrcStruct.Field("row", rowInspector, ROW));
    return new OrcStruct.OrcStructInspector(fields);
  }

  OrcRecordUpdater(Path path,
                   AcidOutputFormat.Options options) throws IOException {
    this.options = options;
    // Initialize acidOperationalProperties based on table properties, and
    // if they are not available, see if we can find it in the job configuration.
    // We have to look at these two places instead of just the conf, because Streaming Ingest
    // uses table properties, while normal Hive SQL inserts/updates/deletes will place this
    // value in the configuration object.
    if (options.getTableProperties() != null) {
      this.acidOperationalProperties =
          AcidUtils.getAcidOperationalProperties(options.getTableProperties());
    } else {
      this.acidOperationalProperties =
          AcidUtils.getAcidOperationalProperties(options.getConfiguration());
    }
    this.bucket.set(options.getBucket());
    this.path = AcidUtils.createFilename(path, options);
    this.deleteEventWriter = null;
    this.deleteEventPath = null;
    FileSystem fs = options.getFilesystem();
    if (fs == null) {
      fs = path.getFileSystem(options.getConfiguration());
    }
    this.fs = fs;
    Path formatFile = new Path(path, ACID_FORMAT);
    if(!fs.exists(formatFile)) {
      try (FSDataOutputStream strm = fs.create(formatFile, false)) {
        strm.writeInt(ORC_ACID_VERSION);
      } catch (IOException ioe) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " +
            ioe);
        }
      }
    }
    if (options.getMinimumTransactionId() != options.getMaximumTransactionId()
        && !options.isWritingBase()){
      flushLengths = fs.create(OrcAcidUtils.getSideFile(this.path), true, 8,
          options.getReporter());
    } else {
      flushLengths = null;
    }
    this.writerOptions = null;
    // If writing delta dirs, we need to make a clone of original options, to avoid polluting it for
    // the base writer
    if (options.isWritingBase()) {
      if (options instanceof OrcOptions) {
        writerOptions = ((OrcOptions) options).getOrcOptions();
      }
      if (writerOptions == null) {
        writerOptions = OrcFile.writerOptions(options.getTableProperties(),
            options.getConfiguration());
      }
    } else {  // delta writer
      AcidOutputFormat.Options optionsCloneForDelta = options.clone();

      if (optionsCloneForDelta instanceof OrcOptions) {
        writerOptions = ((OrcOptions) optionsCloneForDelta).getOrcOptions();
      }
      if (writerOptions == null) {
        writerOptions = OrcFile.writerOptions(optionsCloneForDelta.getTableProperties(),
            optionsCloneForDelta.getConfiguration());
      }
      if (this.acidOperationalProperties.isSplitUpdate()) {
        // If this is a split-update, we initialize a delete delta file path in anticipation that
        // they would write update/delete events to that separate file.
        // This writes to a file in directory which starts with "delete_delta_..."
        // The actual initialization of a writer only happens if any delete events are written.
        this.deleteEventPath = AcidUtils.createFilename(path,
            optionsCloneForDelta.writingDeleteDelta(true));
      }

      // get buffer size and stripe size for base writer
      int baseBufferSizeValue = writerOptions.getBufferSize();
      long baseStripeSizeValue = writerOptions.getStripeSize();

      // overwrite buffer size and stripe size for delta writer, based on BASE_DELTA_RATIO
      int ratio = (int) OrcConf.BASE_DELTA_RATIO.getLong(options.getConfiguration());
      writerOptions.bufferSize(baseBufferSizeValue / ratio);
      writerOptions.stripeSize(baseStripeSizeValue / ratio);
      writerOptions.blockPadding(false);
    }
    writerOptions.fileSystem(fs).callback(indexBuilder);
    rowInspector = (StructObjectInspector)options.getInspector();
    writerOptions.inspector(createEventSchema(findRecId(options.getInspector(),
        options.getRecordIdColumn())));
    item = new OrcStruct(FIELDS);
    item.setFieldValue(OPERATION, operation);
    item.setFieldValue(CURRENT_TRANSACTION, currentTransaction);
    item.setFieldValue(ORIGINAL_TRANSACTION, originalTransaction);
    item.setFieldValue(BUCKET, bucket);
    item.setFieldValue(ROW_ID, rowId);
  }

  @Override
  public String toString() {
    return getClass().getName() + "[" + path +"]";
  }
  /**
   * To handle multiple INSERT... statements in a single transaction, we want to make sure
   * to generate unique {@code rowId} for all inserted rows of the transaction.
   * @return largest rowId created by previous statements (maybe 0)
   * @throws IOException
   */
  private long findRowIdOffsetForInsert() throws IOException {
    /*
    * 1. need to know bucket we are writing to
    * 2. need to know which delta dir it's in
    * Then,
    * 1. find the same bucket file in previous (insert) delta dir for this txn
    *    (Note: in case of split_update, we can ignore the delete_delta dirs)
    * 2. read the footer and get AcidStats which has insert count
     * 2.1 if AcidStats.inserts>0 add to the insert count.
     *  else go to previous delta file
     *  For example, consider insert/update/insert case...*/
    if(options.getStatementId() <= 0) {
      return 0;//there is only 1 statement in this transaction (so far)
    }
    long totalInserts = 0;
    for(int pastStmt = options.getStatementId() - 1; pastStmt >= 0; pastStmt--) {
      Path matchingBucket = AcidUtils.createFilename(options.getFinalDestination(), options.clone().statementId(pastStmt));
      if(!fs.exists(matchingBucket)) {
        continue;
      }
      Reader reader = OrcFile.createReader(matchingBucket, OrcFile.readerOptions(options.getConfiguration()));
      //no close() on Reader?!
      AcidStats acidStats = OrcAcidUtils.parseAcidStats(reader);
      if(acidStats.inserts > 0) {
        totalInserts += acidStats.inserts;
      }
    }
    return totalInserts;
  }
  // Find the record identifier column (if there) and return a possibly new ObjectInspector that
  // will strain out the record id for the underlying writer.
  private ObjectInspector findRecId(ObjectInspector inspector, int rowIdColNum) {
    if (!(inspector instanceof StructObjectInspector)) {
      throw new RuntimeException("Serious problem, expected a StructObjectInspector, but got a " +
          inspector.getClass().getName());
    }
    if (rowIdColNum < 0) {
      return inspector;
    } else {
      RecIdStrippingObjectInspector newInspector =
          new RecIdStrippingObjectInspector(inspector, rowIdColNum);
      recIdField = newInspector.getRecId();
      List<? extends StructField> fields =
          ((StructObjectInspector) recIdField.getFieldObjectInspector()).getAllStructFieldRefs();
      // Go by position, not field name, as field names aren't guaranteed.  The order of fields
      // in RecordIdentifier is transactionId, bucketId, rowId
      originalTxnField = fields.get(0);
      origTxnInspector = (LongObjectInspector)originalTxnField.getFieldObjectInspector();
      bucketField = fields.get(1);
      rowIdField = fields.get(2);
      rowIdInspector = (LongObjectInspector)rowIdField.getFieldObjectInspector();


      recIdInspector = (StructObjectInspector) recIdField.getFieldObjectInspector();
      return newInspector;
    }
  }

  private void addSimpleEvent(int operation, long currentTransaction, long rowId, Object row)
      throws IOException {
    this.operation.set(operation);
    this.currentTransaction.set(currentTransaction);
    // If this is an insert, originalTransaction should be set to this transaction.  If not,
    // it will be reset by the following if anyway.
    long originalTransaction = currentTransaction;
    if (operation == DELETE_OPERATION || operation == UPDATE_OPERATION) {
      Object rowIdValue = rowInspector.getStructFieldData(row, recIdField);
      originalTransaction = origTxnInspector.get(
          recIdInspector.getStructFieldData(rowIdValue, originalTxnField));
      rowId = rowIdInspector.get(recIdInspector.getStructFieldData(rowIdValue, rowIdField));
    }
    else if(operation == INSERT_OPERATION) {
      rowId += rowIdOffset;
    }
    this.rowId.set(rowId);
    this.originalTransaction.set(originalTransaction);
    item.setFieldValue(OrcRecordUpdater.OPERATION, new IntWritable(operation));
    item.setFieldValue(OrcRecordUpdater.ROW, (operation == DELETE_OPERATION ? null : row));
    indexBuilder.addKey(operation, originalTransaction, bucket.get(), rowId);
    if (writer == null) {
      writer = OrcFile.createWriter(path, writerOptions);
    }
    writer.addRow(item);
  }

  private void addSplitUpdateEvent(int operation, long currentTransaction, long rowId, Object row)
      throws IOException {
    if (operation == INSERT_OPERATION) {
      // Just insert the record in the usual way, i.e., default to the simple behavior.
      addSimpleEvent(operation, currentTransaction, rowId, row);
      return;
    }
    this.operation.set(operation);
    this.currentTransaction.set(currentTransaction);
    Object rowValue = rowInspector.getStructFieldData(row, recIdField);
    long originalTransaction = origTxnInspector.get(
            recIdInspector.getStructFieldData(rowValue, originalTxnField));
    rowId = rowIdInspector.get(
            recIdInspector.getStructFieldData(rowValue, rowIdField));

    if (operation == DELETE_OPERATION || operation == UPDATE_OPERATION) {
      // Initialize a deleteEventWriter if not yet done. (Lazy initialization)
      if (deleteEventWriter == null) {
        // Initialize an indexBuilder for deleteEvents.
        deleteEventIndexBuilder = new KeyIndexBuilder();
        // Change the indexBuilder callback too for the deleteEvent file, the remaining writer
        // options remain the same.

        // TODO: When we change the callback, we are essentially mutating the writerOptions.
        // This works but perhaps is not a good thing. The proper way to do this would be
        // to clone the writerOptions, however it requires that the parent OrcFile.writerOptions
        // implements a clone() method (which it does not for now). HIVE-14514 is currently an open
        // JIRA to fix this.

        this.deleteEventWriter = OrcFile.createWriter(deleteEventPath,
                                                      writerOptions.callback(deleteEventIndexBuilder));
      }

      // A delete/update generates a delete event for the original row.
      this.rowId.set(rowId);
      this.originalTransaction.set(originalTransaction);
      item.setFieldValue(OrcRecordUpdater.OPERATION, new IntWritable(DELETE_OPERATION));
      item.setFieldValue(OrcRecordUpdater.ROW, null); // ROW is null for delete events.
      deleteEventIndexBuilder.addKey(DELETE_OPERATION, originalTransaction, bucket.get(), rowId);
      deleteEventWriter.addRow(item);
    }

    if (operation == UPDATE_OPERATION) {
      // A new row is also inserted in the usual delta file for an update event.
      addSimpleEvent(INSERT_OPERATION, currentTransaction, insertedRows++, row);
    }
  }

  @Override
  public void insert(long currentTransaction, Object row) throws IOException {
    if (this.currentTransaction.get() != currentTransaction) {
      insertedRows = 0;
      //this method is almost no-op in hcatalog.streaming case since statementId == 0 is
      //always true in that case
      rowIdOffset = findRowIdOffsetForInsert();
    }
    if (acidOperationalProperties.isSplitUpdate()) {
      addSplitUpdateEvent(INSERT_OPERATION, currentTransaction, insertedRows++, row);
    } else {
      addSimpleEvent(INSERT_OPERATION, currentTransaction, insertedRows++, row);
    }
    rowCountDelta++;
  }

  @Override
  public void update(long currentTransaction, Object row) throws IOException {
    if (this.currentTransaction.get() != currentTransaction) {
      insertedRows = 0;
      rowIdOffset = findRowIdOffsetForInsert();
    }
    if (acidOperationalProperties.isSplitUpdate()) {
      addSplitUpdateEvent(UPDATE_OPERATION, currentTransaction, -1L, row);
    } else {
      addSimpleEvent(UPDATE_OPERATION, currentTransaction, -1L, row);
    }
  }

  @Override
  public void delete(long currentTransaction, Object row) throws IOException {
    if (this.currentTransaction.get() != currentTransaction) {
      insertedRows = 0;
    }
    if (acidOperationalProperties.isSplitUpdate()) {
      addSplitUpdateEvent(DELETE_OPERATION, currentTransaction, -1L, row);
    } else {
      addSimpleEvent(DELETE_OPERATION, currentTransaction, -1L, row);
    }
    rowCountDelta--;
  }

  @Override
  public void flush() throws IOException {
    // We only support flushes on files with multiple transactions, because
    // flushes create significant overhead in HDFS. Record updaters with a
    // single transaction should be closed rather than flushed.
    if (flushLengths == null) {
      throw new IllegalStateException("Attempting to flush a RecordUpdater on "
         + path + " with a single transaction.");
    }
    if (writer == null) {
      writer = OrcFile.createWriter(path, writerOptions);
    }
    long len = writer.writeIntermediateFooter();
    flushLengths.writeLong(len);
    OrcInputFormat.SHIMS.hflush(flushLengths);
  }

  @Override
  public void close(boolean abort) throws IOException {
    if (abort) {
      if (flushLengths == null) {
        fs.delete(path, false);
      }
    } else if (!writerClosed) {
      if (acidOperationalProperties.isSplitUpdate()) {
        // When split-update is enabled, we can choose not to write
        // any delta files when there are no inserts. In such cases only the delete_deltas
        // would be written & they are closed separately below.
        if (writer != null && indexBuilder.acidStats.inserts > 0) {
          writer.close(); // normal close, when there are inserts.
        }
      } else {
        if (writer == null) {
          writer = OrcFile.createWriter(path, writerOptions);
        }
        writer.close(); // normal close.
      }
      if (deleteEventWriter != null) {
        if (deleteEventIndexBuilder.acidStats.deletes > 0) {
          // Only need to write out & close the delete_delta if there have been any.
          deleteEventWriter.close();
        } else {
          // Just remove delete_delta, if there have been no delete events.
          fs.delete(deleteEventPath, false);
        }
      }
    }
    if (flushLengths != null) {
      flushLengths.close();
      fs.delete(OrcAcidUtils.getSideFile(path), false);
    }
    writer = null;
    deleteEventWriter = null;
    writerClosed = true;
  }

  @Override
  public SerDeStats getStats() {
    SerDeStats stats = new SerDeStats();
    stats.setRowCount(rowCountDelta);
    // Don't worry about setting raw data size diff.  I have no idea how to calculate that
    // without finding the row we are updating or deleting, which would be a mess.
    return stats;
  }

  private static final Charset utf8 = Charset.forName("UTF-8");
  private static final CharsetDecoder utf8Decoder = utf8.newDecoder();

  static RecordIdentifier[] parseKeyIndex(Reader reader) {
    String[] stripes;
    try {
      ByteBuffer val =
          reader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME)
              .duplicate();
      stripes = utf8Decoder.decode(val).toString().split(";");
    } catch (CharacterCodingException e) {
      throw new IllegalArgumentException("Bad string encoding for " +
          OrcRecordUpdater.ACID_KEY_INDEX_NAME, e);
    }
    RecordIdentifier[] result = new RecordIdentifier[stripes.length];
    for(int i=0; i < stripes.length; ++i) {
      if (stripes[i].length() != 0) {
        String[] parts = stripes[i].split(",");
        result[i] = new RecordIdentifier();
        result[i].setValues(Long.parseLong(parts[0]),
            Integer.parseInt(parts[1]), Long.parseLong(parts[2]));
      }
    }
    return result;
  }

  static class KeyIndexBuilder implements OrcFile.WriterCallback {
    StringBuilder lastKey = new StringBuilder();
    long lastTransaction;
    int lastBucket;
    long lastRowId;
    AcidStats acidStats = new AcidStats();

    @Override
    public void preStripeWrite(OrcFile.WriterContext context
    ) throws IOException {
      lastKey.append(lastTransaction);
      lastKey.append(',');
      lastKey.append(lastBucket);
      lastKey.append(',');
      lastKey.append(lastRowId);
      lastKey.append(';');
    }

    @Override
    public void preFooterWrite(OrcFile.WriterContext context
                               ) throws IOException {
      context.getWriter().addUserMetadata(ACID_KEY_INDEX_NAME,
          UTF8.encode(lastKey.toString()));
      context.getWriter().addUserMetadata(OrcAcidUtils.ACID_STATS,
          UTF8.encode(acidStats.serialize()));
    }

    void addKey(int op, long transaction, int bucket, long rowId) {
      switch (op) {
        case INSERT_OPERATION:
          acidStats.inserts += 1;
          break;
        case UPDATE_OPERATION:
          acidStats.updates += 1;
          break;
        case DELETE_OPERATION:
          acidStats.deletes += 1;
          break;
        default:
          throw new IllegalArgumentException("Unknown operation " + op);
      }
      lastTransaction = transaction;
      lastBucket = bucket;
      lastRowId = rowId;
    }
  }

  /**
   * An ObjectInspector that will strip out the record identifier so that the underlying writer
   * doesn't see it.
   */
  private static class RecIdStrippingObjectInspector extends StructObjectInspector {
    private StructObjectInspector wrapped;
    List<StructField> fields;
    StructField recId;

    RecIdStrippingObjectInspector(ObjectInspector oi, int rowIdColNum) {
      if (!(oi instanceof StructObjectInspector)) {
        throw new RuntimeException("Serious problem, expected a StructObjectInspector, " +
            "but got a " + oi.getClass().getName());
      }
      wrapped = (StructObjectInspector)oi;
      List<? extends StructField> wrappedFields = wrapped.getAllStructFieldRefs();
      fields = new ArrayList<StructField>(wrapped.getAllStructFieldRefs().size());
      for (int i = 0; i < wrappedFields.size(); i++) {
        if (i == rowIdColNum) {
          recId = wrappedFields.get(i);
        } else {
          fields.add(wrappedFields.get(i));
        }
      }
    }

    @Override
    public List<? extends StructField> getAllStructFieldRefs() {
      return fields;
    }

    @Override
    public StructField getStructFieldRef(String fieldName) {
      return wrapped.getStructFieldRef(fieldName);
    }

    @Override
    public Object getStructFieldData(Object data, StructField fieldRef) {
      // For performance don't check that that the fieldRef isn't recId everytime,
      // just assume that the caller used getAllStructFieldRefs and thus doesn't have that fieldRef
      return wrapped.getStructFieldData(data, fieldRef);
    }

    @Override
    public List<Object> getStructFieldsDataAsList(Object data) {
      return wrapped.getStructFieldsDataAsList(data);
    }

    @Override
    public String getTypeName() {
      return wrapped.getTypeName();
    }

    @Override
    public Category getCategory() {
      return wrapped.getCategory();
    }

    StructField getRecId() {
      return recId;
    }
  }
}