StreamingAssert.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hive.hcatalog.streaming.mutate;

import static org.junit.Assert.assertEquals;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.io.AcidInputFormat.AcidRecordReader;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.IOConstants;
import org.apache.hadoop.hive.ql.io.AcidUtils.Directory;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.thrift.TException;

public class StreamingAssert {

  public static class Factory {
    private IMetaStoreClient metaStoreClient;
    private final HiveConf conf;

    public Factory(IMetaStoreClient metaStoreClient, HiveConf conf) {
      this.metaStoreClient = metaStoreClient;
      this.conf = conf;
    }

    public StreamingAssert newStreamingAssert(Table table) throws Exception {
      return newStreamingAssert(table, Collections.<String> emptyList());
    }

    public StreamingAssert newStreamingAssert(Table table, List<String> partition) throws Exception {
      return new StreamingAssert(metaStoreClient, conf, table, partition);
    }
  }

  private Table table;
  private List<String> partition;
  private IMetaStoreClient metaStoreClient;
  private Directory dir;
  private ValidTxnList txns;
  private List<AcidUtils.ParsedDelta> currentDeltas;
  private long min;
  private long max;
  private Path partitionLocation;

  StreamingAssert(IMetaStoreClient metaStoreClient, HiveConf conf, Table table, List<String> partition)
      throws Exception {
    this.metaStoreClient = metaStoreClient;
    this.table = table;
    this.partition = partition;

    txns = metaStoreClient.getValidTxns();
    partitionLocation = getPartitionLocation();
    dir = AcidUtils.getAcidState(partitionLocation, conf, txns);
    assertEquals(0, dir.getObsolete().size());
    assertEquals(0, dir.getOriginalFiles().size());

    currentDeltas = dir.getCurrentDirectories();
    min = Long.MAX_VALUE;
    max = Long.MIN_VALUE;
    System.out.println("Files found: ");
    for (AcidUtils.ParsedDelta parsedDelta : currentDeltas) {
      System.out.println(parsedDelta.getPath().toString());
      max = Math.max(parsedDelta.getMaxTransaction(), max);
      min = Math.min(parsedDelta.getMinTransaction(), min);
    }
  }

  public void assertExpectedFileCount(int expectedFileCount) {
    assertEquals(expectedFileCount, currentDeltas.size());
  }

  public void assertNothingWritten() {
    assertExpectedFileCount(0);
  }

  public void assertMinTransactionId(long expectedMinTransactionId) {
    if (currentDeltas.isEmpty()) {
      throw new AssertionError("No data");
    }
    assertEquals(expectedMinTransactionId, min);
  }

  public void assertMaxTransactionId(long expectedMaxTransactionId) {
    if (currentDeltas.isEmpty()) {
      throw new AssertionError("No data");
    }
    assertEquals(expectedMaxTransactionId, max);
  }

  List<Record> readRecords() throws Exception {
    if (currentDeltas.isEmpty()) {
      throw new AssertionError("No data");
    }
    InputFormat<NullWritable, OrcStruct> inputFormat = new OrcInputFormat();
    JobConf job = new JobConf();
    job.set("mapred.input.dir", partitionLocation.toString());
    job.set("bucket_count", Integer.toString(table.getSd().getNumBuckets()));
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
    job.set(ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname, "true");
    job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
    InputSplit[] splits = inputFormat.getSplits(job, 1);
    assertEquals(1, splits.length);

    final AcidRecordReader<NullWritable, OrcStruct> recordReader = (AcidRecordReader<NullWritable, OrcStruct>) inputFormat
        .getRecordReader(splits[0], job, Reporter.NULL);

    NullWritable key = recordReader.createKey();
    OrcStruct value = recordReader.createValue();

    List<Record> records = new ArrayList<>();
    while (recordReader.next(key, value)) {
      RecordIdentifier recordIdentifier = recordReader.getRecordIdentifier();
      Record record = new Record(new RecordIdentifier(recordIdentifier.getTransactionId(),
          recordIdentifier.getBucketId(), recordIdentifier.getRowId()), value.toString());
      System.out.println(record);
      records.add(record);
    }
    recordReader.close();
    return records;
  }

  private Path getPartitionLocation() throws NoSuchObjectException, MetaException, TException {
    Path partitionLocacation;
    if (partition.isEmpty()) {
      partitionLocacation = new Path(table.getSd().getLocation());
    } else {
      // TODO: calculate this instead. Just because we're writing to the location doesn't mean that it'll
      // always be wanted in the meta store right away.
      List<Partition> partitionEntries = metaStoreClient.listPartitions(table.getDbName(), table.getTableName(),
          partition, (short) 1);
      partitionLocacation = new Path(partitionEntries.get(0).getSd().getLocation());
    }
    return partitionLocacation;
  }

  public static class Record {
    private RecordIdentifier recordIdentifier;
    private String row;

    Record(RecordIdentifier recordIdentifier, String row) {
      this.recordIdentifier = recordIdentifier;
      this.row = row;
    }

    public RecordIdentifier getRecordIdentifier() {
      return recordIdentifier;
    }

    public String getRow() {
      return row;
    }

    @Override
    public String toString() {
      return "Record [recordIdentifier=" + recordIdentifier + ", row=" + row + "]";
    }

  }

}