/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hive.hcatalog.streaming.mutate; import static org.junit.Assert.assertEquals; import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.io.AcidInputFormat.AcidRecordReader; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.AcidUtils.Directory; import org.apache.hadoop.hive.ql.io.RecordIdentifier; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcStruct; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; import org.apache.thrift.TException; public class StreamingAssert { public static class Factory { private IMetaStoreClient metaStoreClient; private final HiveConf conf; public Factory(IMetaStoreClient metaStoreClient, HiveConf conf) { this.metaStoreClient = metaStoreClient; this.conf = conf; } public StreamingAssert newStreamingAssert(Table table) throws Exception { return newStreamingAssert(table, Collections.<String> emptyList()); } public StreamingAssert newStreamingAssert(Table table, List<String> partition) throws Exception { return new StreamingAssert(metaStoreClient, conf, table, partition); } } private Table table; private List<String> partition; private IMetaStoreClient metaStoreClient; private Directory dir; private ValidTxnList txns; private List<AcidUtils.ParsedDelta> currentDeltas; private long min; private long max; private Path partitionLocation; StreamingAssert(IMetaStoreClient metaStoreClient, HiveConf conf, Table table, List<String> partition) throws Exception { this.metaStoreClient = metaStoreClient; this.table = table; this.partition = partition; txns = metaStoreClient.getValidTxns(); partitionLocation = getPartitionLocation(); dir = AcidUtils.getAcidState(partitionLocation, conf, txns); assertEquals(0, dir.getObsolete().size()); assertEquals(0, dir.getOriginalFiles().size()); currentDeltas = dir.getCurrentDirectories(); min = Long.MAX_VALUE; max = Long.MIN_VALUE; System.out.println("Files found: "); for (AcidUtils.ParsedDelta parsedDelta : currentDeltas) { System.out.println(parsedDelta.getPath().toString()); max = Math.max(parsedDelta.getMaxTransaction(), max); min = Math.min(parsedDelta.getMinTransaction(), min); } } public void assertExpectedFileCount(int expectedFileCount) { assertEquals(expectedFileCount, currentDeltas.size()); } public void assertNothingWritten() { assertExpectedFileCount(0); } public void assertMinTransactionId(long expectedMinTransactionId) { if (currentDeltas.isEmpty()) { throw new AssertionError("No data"); } assertEquals(expectedMinTransactionId, min); } public void assertMaxTransactionId(long expectedMaxTransactionId) { if (currentDeltas.isEmpty()) { throw new AssertionError("No data"); } assertEquals(expectedMaxTransactionId, max); } List<Record> readRecords() throws Exception { if (currentDeltas.isEmpty()) { throw new AssertionError("No data"); } InputFormat<NullWritable, OrcStruct> inputFormat = new OrcInputFormat(); JobConf job = new JobConf(); job.set("mapred.input.dir", partitionLocation.toString()); job.set("bucket_count", Integer.toString(table.getSd().getNumBuckets())); job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg"); job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string"); job.set(ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname, "true"); job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString()); InputSplit[] splits = inputFormat.getSplits(job, 1); assertEquals(1, splits.length); final AcidRecordReader<NullWritable, OrcStruct> recordReader = (AcidRecordReader<NullWritable, OrcStruct>) inputFormat .getRecordReader(splits[0], job, Reporter.NULL); NullWritable key = recordReader.createKey(); OrcStruct value = recordReader.createValue(); List<Record> records = new ArrayList<>(); while (recordReader.next(key, value)) { RecordIdentifier recordIdentifier = recordReader.getRecordIdentifier(); Record record = new Record(new RecordIdentifier(recordIdentifier.getTransactionId(), recordIdentifier.getBucketId(), recordIdentifier.getRowId()), value.toString()); System.out.println(record); records.add(record); } recordReader.close(); return records; } private Path getPartitionLocation() throws NoSuchObjectException, MetaException, TException { Path partitionLocacation; if (partition.isEmpty()) { partitionLocacation = new Path(table.getSd().getLocation()); } else { // TODO: calculate this instead. Just because we're writing to the location doesn't mean that it'll // always be wanted in the meta store right away. List<Partition> partitionEntries = metaStoreClient.listPartitions(table.getDbName(), table.getTableName(), partition, (short) 1); partitionLocacation = new Path(partitionEntries.get(0).getSd().getLocation()); } return partitionLocacation; } public static class Record { private RecordIdentifier recordIdentifier; private String row; Record(RecordIdentifier recordIdentifier, String row) { this.recordIdentifier = recordIdentifier; this.row = row; } public RecordIdentifier getRecordIdentifier() { return recordIdentifier; } public String getRow() { return row; } @Override public String toString() { return "Record [recordIdentifier=" + recordIdentifier + ", row=" + row + "]"; } } }