/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.serde2.lazybinary; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.serde2.SerDeStatsStruct; import org.apache.hadoop.hive.serde2.StructObject; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.BinaryComparable; /** * LazyBinaryStruct is serialized as follows: start A B A B A B end bytes[] -> * |-----|---------|--- ... ---|-----|---------| * * Section A is one null-byte, corresponding to eight struct fields in Section * B. Each bit indicates whether the corresponding field is null (0) or not null * (1). Each field is a LazyBinaryObject. * * Following B, there is another section A and B. This pattern repeats until the * all struct fields are serialized. */ public class LazyBinaryStruct extends LazyBinaryNonPrimitive<LazyBinaryStructObjectInspector> implements StructObject, SerDeStatsStruct { private static final Logger LOG = LoggerFactory.getLogger(LazyBinaryStruct.class.getName()); /** * Whether the data is already parsed or not. */ boolean parsed; /** * Size of serialized data */ long serializedSize; /** * The fields of the struct. */ LazyBinaryObject[] fields; /** * Whether a field is initialized or not. */ boolean[] fieldInited; /** * Whether a field is null or not. Because length is 0 does not means the * field is null. In particular, a 0-length string is not null. */ boolean[] fieldIsNull; /** * The start positions and lengths of struct fields. Only valid when the data * is parsed. */ int[] fieldStart; int[] fieldLength; /** * Construct a LazyBinaryStruct object with an ObjectInspector. */ protected LazyBinaryStruct(LazyBinaryStructObjectInspector oi) { super(oi); } @Override public void init(ByteArrayRef bytes, int start, int length) { super.init(bytes, start, length); parsed = false; serializedSize = length; } final VInt vInt = new VInt(); final RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo(); boolean missingFieldWarned = false; boolean extraFieldWarned = false; /** * Parse the byte[] and fill fieldStart, fieldLength, fieldInited and * fieldIsNull. */ private void parse() { List<? extends StructField> fieldRefs = ((StructObjectInspector) oi) .getAllStructFieldRefs(); if (fields == null) { fields = new LazyBinaryObject[fieldRefs.size()]; for (int i = 0; i < fields.length; i++) { ObjectInspector insp = fieldRefs.get(i).getFieldObjectInspector(); fields[i] = insp == null ? null : LazyBinaryFactory.createLazyBinaryObject(insp); } fieldInited = new boolean[fields.length]; fieldIsNull = new boolean[fields.length]; fieldStart = new int[fields.length]; fieldLength = new int[fields.length]; } /** * Please note that one null byte is followed by eight fields, then more * null byte and fields. */ int fieldId = 0; int structByteEnd = start + length; byte[] bytes = this.bytes.getData(); byte nullByte = bytes[start]; int lastFieldByteEnd = start + 1; // Go through all bytes in the byte[] for (int i = 0; i < fields.length; i++) { fieldIsNull[i] = true; if ((nullByte & (1 << (i % 8))) != 0) { fieldIsNull[i] = false; LazyBinaryUtils.checkObjectByteInfo(fieldRefs.get(i) .getFieldObjectInspector(), bytes, lastFieldByteEnd, recordInfo, vInt); fieldStart[i] = lastFieldByteEnd + recordInfo.elementOffset; fieldLength[i] = recordInfo.elementSize; lastFieldByteEnd = fieldStart[i] + fieldLength[i]; } // count how many fields are there if (lastFieldByteEnd <= structByteEnd) { fieldId++; } // next byte is a null byte if there are more bytes to go if (7 == (i % 8)) { if (lastFieldByteEnd < structByteEnd) { nullByte = bytes[lastFieldByteEnd]; lastFieldByteEnd++; } else { // otherwise all null afterwards nullByte = 0; lastFieldByteEnd++; } } } // Extra bytes at the end? if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) { extraFieldWarned = true; LOG.warn("Extra bytes detected at the end of the row! " + "Last field end " + lastFieldByteEnd + " and serialize buffer end " + structByteEnd + ". " + "Ignoring similar problems."); } // Missing fields? if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) { missingFieldWarned = true; LOG.info("Missing fields! Expected " + fields.length + " fields but " + "only got " + fieldId + "! " + "Last field end " + lastFieldByteEnd + " and serialize buffer end " + structByteEnd + ". " + "Ignoring similar problems."); } Arrays.fill(fieldInited, false); parsed = true; } /** * Get one field out of the struct. * * If the field is a primitive field, return the actual object. Otherwise * return the LazyObject. This is because PrimitiveObjectInspector does not * have control over the object used by the user - the user simply directly * use the Object instead of going through Object * PrimitiveObjectInspector.get(Object). * * @param fieldID * The field ID * @return The field as a LazyObject */ public Object getField(int fieldID) { if (!parsed) { parse(); } return uncheckedGetField(fieldID); } public static final class SingleFieldGetter { private final VInt vInt = new VInt(); private final LazyBinaryStructObjectInspector soi; private final int fieldIndex; private final RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo(); private byte[] fieldBytes; private int fieldStart, fieldLength; public SingleFieldGetter(LazyBinaryStructObjectInspector soi, int fieldIndex) { this.soi = soi; this.fieldIndex = fieldIndex; } public void init(BinaryComparable src) { List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs(); fieldBytes = src.getBytes(); int length = src.getLength(); byte nullByte = fieldBytes[0]; int lastFieldByteEnd = 1, fieldStart = -1, fieldLength = -1; for (int i = 0; i <= fieldIndex; i++) { if ((nullByte & (1 << (i % 8))) != 0) { LazyBinaryUtils.checkObjectByteInfo(fieldRefs.get(i) .getFieldObjectInspector(), fieldBytes, lastFieldByteEnd, recordInfo, vInt); fieldStart = lastFieldByteEnd + recordInfo.elementOffset; fieldLength = recordInfo.elementSize; lastFieldByteEnd = fieldStart + fieldLength; } else { fieldStart = fieldLength = -1; } if (7 == (i % 8)) { nullByte = (lastFieldByteEnd < length) ? fieldBytes[lastFieldByteEnd] : 0; ++lastFieldByteEnd; } } } public short getShort() { assert (2 == fieldLength); return LazyBinaryUtils.byteArrayToShort(fieldBytes, fieldStart); } } /** * Get the field out of the row without checking parsed. This is called by * both getField and getFieldsAsList. * * @param fieldID * The id of the field starting from 0. * @return The value of the field */ private Object uncheckedGetField(int fieldID) { // Test the length first so in most cases we avoid doing a byte[] // comparison. if (fieldIsNull[fieldID]) { return null; } if (!fieldInited[fieldID]) { fieldInited[fieldID] = true; fields[fieldID].init(bytes, fieldStart[fieldID], fieldLength[fieldID]); } return fields[fieldID].getObject(); } ArrayList<Object> cachedList; /** * Get the values of the fields as an ArrayList. * * @return The values of the fields as an ArrayList. */ public ArrayList<Object> getFieldsAsList() { if (!parsed) { parse(); } if (cachedList == null) { cachedList = new ArrayList<Object>(fields.length); for (int i = 0; i < fields.length; i++) { cachedList.add(uncheckedGetField(i)); } } else { assert fields.length == cachedList.size(); for (int i = 0; i < fields.length; i++) { cachedList.set(i, uncheckedGetField(i)); } } return cachedList; } @Override public Object getObject() { return this; } public long getRawDataSerializedSize() { return serializedSize; } }