/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2.lazybinary.fast;
import java.io.IOException;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.serde2.ByteStream;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils;
import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.LIST;
import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.MAP;
import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.STRUCT;
import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.UNION;
/*
* Directly serialize, field-by-field, the LazyBinary format.
*
* This is an alternative way to serialize than what is provided by LazyBinarySerDe.
*/
public class LazyBinarySerializeWrite implements SerializeWrite {
public static final Logger LOG = LoggerFactory.getLogger(LazyBinarySerializeWrite.class.getName());
private Output output;
private int rootFieldCount;
private boolean skipLengthPrefix = false;
// For thread safety, we allocate private writable objects for our use only.
private TimestampWritable timestampWritable;
private HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable;
private HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable;
private HiveIntervalDayTime hiveIntervalDayTime;
private byte[] vLongBytes;
private long[] scratchLongs;
private byte[] scratchBuffer;
private Field root;
private Deque<Field> stack = new ArrayDeque<>();
private LazyBinarySerDe.BooleanRef warnedOnceNullMapKey;
private static class Field {
Category type;
int fieldCount;
int fieldIndex;
int byteSizeStart;
int start;
long nullOffset;
byte nullByte;
Field(Category type) {
this.type = type;
}
}
public LazyBinarySerializeWrite(int fieldCount) {
this();
vLongBytes = new byte[LazyBinaryUtils.VLONG_BYTES_LEN];
this.rootFieldCount = fieldCount;
resetWithoutOutput();
}
// Not public since we must have the field count and other information.
private LazyBinarySerializeWrite() {
}
/*
* Set the buffer that will receive the serialized data. The output buffer will be reset.
*/
@Override
public void set(Output output) {
this.output = output;
output.reset();
resetWithoutOutput();
}
/*
* Set the buffer that will receive the serialized data. The output buffer will NOT be reset.
*/
@Override
public void setAppend(Output output) {
this.output = output;
resetWithoutOutput();
root.nullOffset = output.getLength();
}
/*
* Reset the previously supplied buffer that will receive the serialized data.
*/
@Override
public void reset() {
output.reset();
resetWithoutOutput();
}
private void resetWithoutOutput() {
root = new Field(STRUCT);
root.fieldCount = rootFieldCount;
stack.clear();
stack.push(root);
warnedOnceNullMapKey = null;
}
/*
* Write a NULL field.
*/
@Override
public void writeNull() throws IOException {
final Field current = stack.peek();
if (current.type == STRUCT) {
// Every 8 fields we write a NULL byte.
if ((current.fieldIndex % 8) == 0) {
if (current.fieldIndex > 0) {
// Write back previous 8 field's NULL byte.
output.writeByte(current.nullOffset, current.nullByte);
current.nullByte = 0;
current.nullOffset = output.getLength();
}
// Allocate next NULL byte.
output.reserve(1);
}
// We DO NOT set a bit in the NULL byte when we are writing a NULL.
current.fieldIndex++;
if (current.fieldIndex == current.fieldCount) {
// Write back the final NULL byte before the last fields.
output.writeByte(current.nullOffset, current.nullByte);
}
}
}
/*
* BOOLEAN.
*/
@Override
public void writeBoolean(boolean v) throws IOException {
beginElement();
output.write((byte) (v ? 1 : 0));
finishElement();
}
/*
* BYTE.
*/
@Override
public void writeByte(byte v) throws IOException {
beginElement();
output.write(v);
finishElement();
}
/*
* SHORT.
*/
@Override
public void writeShort(short v) throws IOException {
beginElement();
output.write((byte) (v >> 8));
output.write((byte) (v));
finishElement();
}
/*
* INT.
*/
@Override
public void writeInt(int v) throws IOException {
beginElement();
writeVInt(v);
finishElement();
}
/*
* LONG.
*/
@Override
public void writeLong(long v) throws IOException {
beginElement();
writeVLong(v);
finishElement();
}
/*
* FLOAT.
*/
@Override
public void writeFloat(float vf) throws IOException {
beginElement();
int v = Float.floatToIntBits(vf);
output.write((byte) (v >> 24));
output.write((byte) (v >> 16));
output.write((byte) (v >> 8));
output.write((byte) (v));
finishElement();
}
/*
* DOUBLE.
*/
@Override
public void writeDouble(double v) throws IOException {
beginElement();
LazyBinaryUtils.writeDouble(output, v);
finishElement();
}
/*
* STRING.
*
* Can be used to write CHAR and VARCHAR when the caller takes responsibility for
* truncation/padding issues.
*/
@Override
public void writeString(byte[] v) throws IOException {
beginElement();
final int length = v.length;
writeVInt(length);
output.write(v, 0, length);
finishElement();
}
@Override
public void writeString(byte[] v, int start, int length) throws IOException {
beginElement();
writeVInt(length);
output.write(v, start, length);
finishElement();
}
/*
* CHAR.
*/
@Override
public void writeHiveChar(HiveChar hiveChar) throws IOException {
final String string = hiveChar.getStrippedValue();
final byte[] bytes = string.getBytes();
writeString(bytes);
}
/*
* VARCHAR.
*/
@Override
public void writeHiveVarchar(HiveVarchar hiveVarchar) throws IOException {
final String string = hiveVarchar.getValue();
final byte[] bytes = string.getBytes();
writeString(bytes);
}
/*
* BINARY.
*/
@Override
public void writeBinary(byte[] v) throws IOException {
writeString(v);
}
@Override
public void writeBinary(byte[] v, int start, int length) throws IOException {
writeString(v, start, length);
}
/*
* DATE.
*/
@Override
public void writeDate(Date date) throws IOException {
beginElement();
writeVInt(DateWritable.dateToDays(date));
finishElement();
}
// We provide a faster way to write a date without a Date object.
@Override
public void writeDate(int dateAsDays) throws IOException {
beginElement();
writeVInt(dateAsDays);
finishElement();
}
/*
* TIMESTAMP.
*/
@Override
public void writeTimestamp(Timestamp v) throws IOException {
beginElement();
if (timestampWritable == null) {
timestampWritable = new TimestampWritable();
}
timestampWritable.set(v);
timestampWritable.writeToByteStream(output);
finishElement();
}
/*
* INTERVAL_YEAR_MONTH.
*/
@Override
public void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException {
beginElement();
if (hiveIntervalYearMonthWritable == null) {
hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable();
}
hiveIntervalYearMonthWritable.set(viyt);
hiveIntervalYearMonthWritable.writeToByteStream(output);
finishElement();
}
@Override
public void writeHiveIntervalYearMonth(int totalMonths) throws IOException {
beginElement();
if (hiveIntervalYearMonthWritable == null) {
hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable();
}
hiveIntervalYearMonthWritable.set(totalMonths);
hiveIntervalYearMonthWritable.writeToByteStream(output);
finishElement();
}
/*
* INTERVAL_DAY_TIME.
*/
@Override
public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException {
beginElement();
if (hiveIntervalDayTimeWritable == null) {
hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable();
}
hiveIntervalDayTimeWritable.set(vidt);
hiveIntervalDayTimeWritable.writeToByteStream(output);
finishElement();
}
/*
* DECIMAL.
*
* NOTE: The scale parameter is for text serialization (e.g. HiveDecimal.toFormatString) that
* creates trailing zeroes output decimals.
*/
@Override
public void writeHiveDecimal(HiveDecimal dec, int scale) throws IOException {
beginElement();
if (scratchLongs == null) {
scratchLongs = new long[HiveDecimal.SCRATCH_LONGS_LEN];
scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_BIG_INTEGER_BYTES];
}
LazyBinarySerDe.writeToByteStream(
output,
dec,
scratchLongs,
scratchBuffer);
finishElement();
}
@Override
public void writeHiveDecimal(HiveDecimalWritable decWritable, int scale) throws IOException {
beginElement();
if (scratchLongs == null) {
scratchLongs = new long[HiveDecimal.SCRATCH_LONGS_LEN];
scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_BIG_INTEGER_BYTES];
}
LazyBinarySerDe.writeToByteStream(
output,
decWritable,
scratchLongs,
scratchBuffer);
finishElement();
}
/*
* Write a VInt using our temporary byte buffer instead of paying the thread local performance
* cost of LazyBinaryUtils.writeVInt
*/
private void writeVInt(int v) {
final int len = LazyBinaryUtils.writeVLongToByteArray(vLongBytes, v);
output.write(vLongBytes, 0, len);
}
private void writeVLong(long v) {
final int len = LazyBinaryUtils.writeVLongToByteArray(vLongBytes, v);
output.write(vLongBytes, 0, len);
}
@Override
public void beginList(List list) {
final Field current = new Field(LIST);
beginComplex(current);
final int size = list.size();
current.fieldCount = size;
if (!skipLengthPrefix) {
// 1/ reserve spaces for the byte size of the list
// which is a integer and takes four bytes
current.byteSizeStart = output.getLength();
output.reserve(4);
current.start = output.getLength();
}
// 2/ write the size of the list as a VInt
LazyBinaryUtils.writeVInt(output, size);
// 3/ write the null bytes
byte nullByte = 0;
for (int eid = 0; eid < size; eid++) {
// set the bit to 1 if an element is not null
if (null != list.get(eid)) {
nullByte |= 1 << (eid % 8);
}
// store the byte every eight elements or
// if this is the last element
if (7 == eid % 8 || eid == size - 1) {
output.write(nullByte);
nullByte = 0;
}
}
}
@Override
public void separateList() {
}
@Override
public void finishList() {
final Field current = stack.peek();
if (!skipLengthPrefix) {
// 5/ update the list byte size
int listEnd = output.getLength();
int listSize = listEnd - current.start;
writeSizeAtOffset(output, current.byteSizeStart, listSize);
}
finishComplex();
}
@Override
public void beginMap(Map<?, ?> map) {
final Field current = new Field(MAP);
beginComplex(current);
if (!skipLengthPrefix) {
// 1/ reserve spaces for the byte size of the map
// which is a integer and takes four bytes
current.byteSizeStart = output.getLength();
output.reserve(4);
current.start = output.getLength();
}
// 2/ write the size of the map which is a VInt
final int size = map.size();
current.fieldIndex = size;
LazyBinaryUtils.writeVInt(output, size);
// 3/ write the null bytes
int b = 0;
byte nullByte = 0;
for (Map.Entry<?, ?> entry : map.entrySet()) {
// set the bit to 1 if a key is not null
if (null != entry.getKey()) {
nullByte |= 1 << (b % 8);
} else if (warnedOnceNullMapKey != null) {
if (!warnedOnceNullMapKey.value) {
LOG.warn("Null map key encountered! Ignoring similar problems.");
}
warnedOnceNullMapKey.value = true;
}
b++;
// set the bit to 1 if a value is not null
if (null != entry.getValue()) {
nullByte |= 1 << (b % 8);
}
b++;
// write the byte to stream every 4 key-value pairs
// or if this is the last key-value pair
if (0 == b % 8 || b == size * 2) {
output.write(nullByte);
nullByte = 0;
}
}
}
@Override
public void separateKey() {
}
@Override
public void separateKeyValuePair() {
}
@Override
public void finishMap() {
final Field current = stack.peek();
if (!skipLengthPrefix) {
// 5/ update the byte size of the map
int mapEnd = output.getLength();
int mapSize = mapEnd - current.start;
writeSizeAtOffset(output, current.byteSizeStart, mapSize);
}
finishComplex();
}
@Override
public void beginStruct(List fieldValues) {
final Field current = new Field(STRUCT);
beginComplex(current);
current.fieldCount = fieldValues.size();
if (!skipLengthPrefix) {
// 1/ reserve spaces for the byte size of the struct
// which is a integer and takes four bytes
current.byteSizeStart = output.getLength();
output.reserve(4);
current.start = output.getLength();
}
current.nullOffset = output.getLength();
}
@Override
public void separateStruct() {
}
@Override
public void finishStruct() {
final Field current = stack.peek();
if (!skipLengthPrefix) {
// 3/ update the byte size of the struct
int typeEnd = output.getLength();
int typeSize = typeEnd - current.start;
writeSizeAtOffset(output, current.byteSizeStart, typeSize);
}
finishComplex();
}
@Override
public void beginUnion(int tag) throws IOException {
final Field current = new Field(UNION);
beginComplex(current);
current.fieldCount = 1;
if (!skipLengthPrefix) {
// 1/ reserve spaces for the byte size of the struct
// which is a integer and takes four bytes
current.byteSizeStart = output.getLength();
output.reserve(4);
current.start = output.getLength();
}
// 2/ serialize the union
output.write(tag);
}
@Override
public void finishUnion() {
final Field current = stack.peek();
if (!skipLengthPrefix) {
// 3/ update the byte size of the struct
int typeEnd = output.getLength();
int typeSize = typeEnd - current.start;
writeSizeAtOffset(output, current.byteSizeStart, typeSize);
}
finishComplex();
}
private void beginElement() {
final Field current = stack.peek();
if (current.type == STRUCT) {
// Every 8 fields we write a NULL byte.
if ((current.fieldIndex % 8) == 0) {
if (current.fieldIndex > 0) {
// Write back previous 8 field's NULL byte.
output.writeByte(current.nullOffset, current.nullByte);
current.nullByte = 0;
current.nullOffset = output.getLength();
}
// Allocate next NULL byte.
output.reserve(1);
}
// Set bit in NULL byte when a field is NOT NULL.
current.nullByte |= 1 << (current.fieldIndex % 8);
}
}
private void finishElement() {
final Field current = stack.peek();
if (current.type == STRUCT) {
current.fieldIndex++;
if (current.fieldIndex == current.fieldCount) {
// Write back the final NULL byte before the last fields.
output.writeByte(current.nullOffset, current.nullByte);
}
}
}
private void beginComplex(Field field) {
beginElement();
stack.push(field);
}
private void finishComplex() {
stack.pop();
finishElement();
}
private static void writeSizeAtOffset(
ByteStream.RandomAccessOutput byteStream, int byteSizeStart, int size) {
byteStream.writeInt(byteSizeStart, size);
}
}