/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.io.orc;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.PrintStream;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.ql.io.RecordUpdater;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.Reporter;
import org.apache.orc.impl.OrcAcidUtils;
import org.apache.orc.tools.FileDump;
import org.junit.Test;
public class TestOrcRecordUpdater {
@Test
public void testAccessors() throws Exception {
OrcStruct event = new OrcStruct(OrcRecordUpdater.FIELDS);
event.setFieldValue(OrcRecordUpdater.OPERATION,
new IntWritable(OrcRecordUpdater.INSERT_OPERATION));
event.setFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION,
new LongWritable(100));
event.setFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION,
new LongWritable(50));
event.setFieldValue(OrcRecordUpdater.BUCKET, new IntWritable(200));
event.setFieldValue(OrcRecordUpdater.ROW_ID, new LongWritable(300));
assertEquals(OrcRecordUpdater.INSERT_OPERATION,
OrcRecordUpdater.getOperation(event));
assertEquals(50, OrcRecordUpdater.getOriginalTransaction(event));
assertEquals(100, OrcRecordUpdater.getCurrentTransaction(event));
assertEquals(200, OrcRecordUpdater.getBucket(event));
assertEquals(300, OrcRecordUpdater.getRowId(event));
}
Path workDir = new Path(System.getProperty("test.tmp.dir",
"target" + File.separator + "test" + File.separator + "tmp"));
static class MyRow {
Text field;
RecordIdentifier ROW__ID;
MyRow(String val) {
field = new Text(val);
ROW__ID = null;
}
MyRow(String val, long rowId, long origTxn, int bucket) {
field = new Text(val);
ROW__ID = new RecordIdentifier(origTxn, bucket, rowId);
}
}
@Test
public void testWriter() throws Exception {
Path root = new Path(workDir, "testWriter");
Configuration conf = new Configuration();
// Must use raw local because the checksummer doesn't honor flushes.
FileSystem fs = FileSystem.getLocal(conf).getRaw();
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector
(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf)
.filesystem(fs)
.bucket(10)
.writingBase(false)
.minimumTransactionId(10)
.maximumTransactionId(19)
.inspector(inspector)
.reporter(Reporter.NULL)
.finalDestination(root);
RecordUpdater updater = new OrcRecordUpdater(root, options);
updater.insert(11, new MyRow("first"));
updater.insert(11, new MyRow("second"));
updater.insert(11, new MyRow("third"));
updater.flush();
updater.insert(12, new MyRow("fourth"));
updater.insert(12, new MyRow("fifth"));
updater.flush();
// Check the stats
assertEquals(5L, updater.getStats().getRowCount());
Path bucketPath = AcidUtils.createFilename(root, options);
Path sidePath = OrcAcidUtils.getSideFile(bucketPath);
DataInputStream side = fs.open(sidePath);
// read the stopping point for the first flush and make sure we only see
// 3 rows
long len = side.readLong();
Reader reader = OrcFile.createReader(bucketPath,
new OrcFile.ReaderOptions(conf).filesystem(fs).maxLength(len));
assertEquals(3, reader.getNumberOfRows());
// read the second flush and make sure we see all 5 rows
len = side.readLong();
side.close();
reader = OrcFile.createReader(bucketPath,
new OrcFile.ReaderOptions(conf).filesystem(fs).maxLength(len));
assertEquals(5, reader.getNumberOfRows());
RecordReader rows = reader.rows();
// check the contents of the file
assertEquals(true, rows.hasNext());
OrcStruct row = (OrcStruct) rows.next(null);
assertEquals(OrcRecordUpdater.INSERT_OPERATION,
OrcRecordUpdater.getOperation(row));
assertEquals(11, OrcRecordUpdater.getCurrentTransaction(row));
assertEquals(11, OrcRecordUpdater.getOriginalTransaction(row));
assertEquals(10, OrcRecordUpdater.getBucket(row));
assertEquals(0, OrcRecordUpdater.getRowId(row));
assertEquals("first",
OrcRecordUpdater.getRow(row).getFieldValue(0).toString());
assertEquals(true, rows.hasNext());
row = (OrcStruct) rows.next(null);
assertEquals(1, OrcRecordUpdater.getRowId(row));
assertEquals(10, OrcRecordUpdater.getBucket(row));
assertEquals("second",
OrcRecordUpdater.getRow(row).getFieldValue(0).toString());
assertEquals(true, rows.hasNext());
row = (OrcStruct) rows.next(null);
assertEquals(2, OrcRecordUpdater.getRowId(row));
assertEquals(10, OrcRecordUpdater.getBucket(row));
assertEquals("third",
OrcRecordUpdater.getRow(row).getFieldValue(0).toString());
assertEquals(true, rows.hasNext());
row = (OrcStruct) rows.next(null);
assertEquals(12, OrcRecordUpdater.getCurrentTransaction(row));
assertEquals(12, OrcRecordUpdater.getOriginalTransaction(row));
assertEquals(10, OrcRecordUpdater.getBucket(row));
assertEquals(0, OrcRecordUpdater.getRowId(row));
assertEquals("fourth",
OrcRecordUpdater.getRow(row).getFieldValue(0).toString());
assertEquals(true, rows.hasNext());
row = (OrcStruct) rows.next(null);
assertEquals(1, OrcRecordUpdater.getRowId(row));
assertEquals("fifth",
OrcRecordUpdater.getRow(row).getFieldValue(0).toString());
assertEquals(false, rows.hasNext());
// add one more record and close
updater.insert(20, new MyRow("sixth"));
updater.close(false);
reader = OrcFile.createReader(bucketPath,
new OrcFile.ReaderOptions(conf).filesystem(fs));
assertEquals(6, reader.getNumberOfRows());
assertEquals(6L, updater.getStats().getRowCount());
assertEquals(false, fs.exists(sidePath));
}
@Test
public void testWriterTblProperties() throws Exception {
Path root = new Path(workDir, "testWriterTblProperties");
Configuration conf = new Configuration();
// Must use raw local because the checksummer doesn't honor flushes.
FileSystem fs = FileSystem.getLocal(conf).getRaw();
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector
(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Properties tblProps = new Properties();
tblProps.setProperty("orc.compress", "SNAPPY");
tblProps.setProperty("orc.compress.size", "8192");
HiveConf.setIntVar(conf, HiveConf.ConfVars.HIVE_ORC_BASE_DELTA_RATIO, 4);
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf)
.filesystem(fs)
.bucket(10)
.writingBase(false)
.minimumTransactionId(10)
.maximumTransactionId(19)
.inspector(inspector)
.reporter(Reporter.NULL)
.finalDestination(root)
.tableProperties(tblProps);
RecordUpdater updater = new OrcRecordUpdater(root, options);
updater.insert(11, new MyRow("first"));
updater.insert(11, new MyRow("second"));
updater.insert(11, new MyRow("third"));
updater.flush();
updater.insert(12, new MyRow("fourth"));
updater.insert(12, new MyRow("fifth"));
updater.flush();
PrintStream origOut = System.out;
ByteArrayOutputStream myOut = new ByteArrayOutputStream();
System.setOut(new PrintStream(myOut));
FileDump.main(new String[]{root.toUri().toString()});
System.out.flush();
String outDump = new String(myOut.toByteArray());
assertEquals(true, outDump.contains("Compression: SNAPPY"));
assertEquals(true, outDump.contains("Compression size: 2048"));
System.setOut(origOut);
updater.close(false);
}
@Test
public void testUpdates() throws Exception {
Path root = new Path(workDir, "testUpdates");
Configuration conf = new Configuration();
FileSystem fs = root.getFileSystem(conf);
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector
(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
int bucket = 20;
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf)
.filesystem(fs)
.bucket(bucket)
.writingBase(false)
.minimumTransactionId(100)
.maximumTransactionId(100)
.inspector(inspector)
.reporter(Reporter.NULL)
.recordIdColumn(1)
.finalDestination(root);
RecordUpdater updater = new OrcRecordUpdater(root, options);
updater.update(100, new MyRow("update", 30, 10, bucket));
updater.delete(100, new MyRow("", 60, 40, bucket));
assertEquals(-1L, updater.getStats().getRowCount());
updater.close(false);
Path bucketPath = AcidUtils.createFilename(root, options);
Reader reader = OrcFile.createReader(bucketPath,
new OrcFile.ReaderOptions(conf).filesystem(fs));
assertEquals(2, reader.getNumberOfRows());
RecordReader rows = reader.rows();
// check the contents of the file
assertEquals(true, rows.hasNext());
OrcStruct row = (OrcStruct) rows.next(null);
assertEquals(OrcRecordUpdater.UPDATE_OPERATION,
OrcRecordUpdater.getOperation(row));
assertEquals(100, OrcRecordUpdater.getCurrentTransaction(row));
assertEquals(10, OrcRecordUpdater.getOriginalTransaction(row));
assertEquals(20, OrcRecordUpdater.getBucket(row));
assertEquals(30, OrcRecordUpdater.getRowId(row));
assertEquals("update",
OrcRecordUpdater.getRow(row).getFieldValue(0).toString());
assertEquals(true, rows.hasNext());
row = (OrcStruct) rows.next(null);
assertEquals(100, OrcRecordUpdater.getCurrentTransaction(row));
assertEquals(40, OrcRecordUpdater.getOriginalTransaction(row));
assertEquals(20, OrcRecordUpdater.getBucket(row));
assertEquals(60, OrcRecordUpdater.getRowId(row));
assertNull(OrcRecordUpdater.getRow(row));
assertEquals(false, rows.hasNext());
}
}