/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2.lazy;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.io.Text;
/**
* LazyMap stores a map of Primitive LazyObjects to LazyObjects. Note that the
* keys of the map cannot contain null.
*
* LazyMap does not deal with the case of a NULL map. That is handled by the
* parent LazyObject.
*/
public class LazyMap extends LazyNonPrimitive<LazyMapObjectInspector> {
public static final Logger LOG = LoggerFactory.getLogger(LazyMap.class);
/**
* Whether the data is already parsed or not.
*/
boolean parsed = false;
/**
* The size of the map. Only valid when the data is parsed. -1 when the map is
* NULL.
*/
int mapSize = 0;
/**
* The beginning position of key[i]. Only valid when the data is parsed. Note
* that keyStart[mapSize] = begin + length + 1; that makes sure we can use the
* same formula to compute the length of each value in the map.
*/
int[] keyStart;
/**
* The end position of key[i] (the position of the key-value separator). Only
* valid when the data is parsed.
*/
int[] keyEnd;
/**
* The length of value[i].
*/
int[] valueLength;
/**
* The keys are stored in an array of LazyPrimitives.
*/
LazyPrimitive<?, ?>[] keyObjects;
/**
* Whether init() is called on keyObjects[i].
*/
boolean[] keyInited;
/**
* The values are stored in an array of LazyObjects. value[index] will start
* from KeyEnd[index] + 1, and ends before KeyStart[index+1] - 1.
*/
LazyObject[] valueObjects;
/**
* Whether init() is called on valueObjects[i].
*/
boolean[] valueInited;
/**
* Construct a LazyMap object with the ObjectInspector.
*/
protected LazyMap(LazyMapObjectInspector oi) {
super(oi);
}
/**
* Set the row data for this LazyArray.
*
* @see LazyObject#init(ByteArrayRef, int, int)
*/
@Override
public void init(ByteArrayRef bytes, int start, int length) {
super.init(bytes, start, length);
parsed = false;
cachedMap = null;
keyStart = null;
}
/**
* Enlarge the size of arrays storing information for the elements inside the
* array.
*/
protected void enlargeArrays() {
if (keyStart == null) {
int initialSize = 2;
keyStart = new int[initialSize];
keyEnd = new int[initialSize];
valueLength = new int[initialSize];
keyObjects = new LazyPrimitive<?, ?>[initialSize];
valueObjects = new LazyObject[initialSize];
keyInited = new boolean[initialSize];
valueInited = new boolean[initialSize];
} else {
keyStart = Arrays.copyOf(keyStart, keyStart.length * 2);
keyEnd = Arrays.copyOf(keyEnd, keyEnd.length * 2);
valueLength = Arrays.copyOf(valueLength, valueLength.length * 2);
keyObjects = Arrays.copyOf(keyObjects, keyObjects.length * 2);
valueObjects = Arrays.copyOf(valueObjects, valueObjects.length * 2);
keyInited = Arrays.copyOf(keyInited, keyInited.length * 2);
valueInited = Arrays.copyOf(valueInited, valueInited.length * 2);
}
}
/**
* Parse the byte[] and fill keyStart, keyEnd.
*/
private void parse() {
parsed = true;
byte itemSeparator = oi.getItemSeparator();
byte keyValueSeparator = oi.getKeyValueSeparator();
boolean isEscaped = oi.isEscaped();
byte escapeChar = oi.getEscapeChar();
// empty array?
if (length == 0) {
mapSize = 0;
return;
}
mapSize = 0;
int arrayByteEnd = start + length;
int elementByteBegin = start;
int keyValueSeparatorPosition = -1;
int elementByteEnd = start;
byte[] bytes = this.bytes.getData();
Set<Object> keySet = new LinkedHashSet<Object>();
// Go through all bytes in the byte[]
while (elementByteEnd <= arrayByteEnd) {
// End of entry reached?
if (elementByteEnd == arrayByteEnd
|| bytes[elementByteEnd] == itemSeparator) {
// Array full?
if (keyStart == null || mapSize + 1 == keyStart.length) {
enlargeArrays();
}
keyStart[mapSize] = elementByteBegin;
// If no keyValueSeparator is seen, all bytes belong to key, and
// value will be NULL.
keyEnd[mapSize] = (keyValueSeparatorPosition == -1 ? elementByteEnd
: keyValueSeparatorPosition);
valueLength[mapSize] = elementByteEnd - (keyEnd[mapSize] + 1);
LazyPrimitive<?, ?> lazyKey = uncheckedGetKey(mapSize);
if (lazyKey == null) {
LOG.warn("skipped empty entry or entry with empty key in the representation of column with MAP type.");
//reset keyInited[mapSize] flag, since it may be set to true in the case of previous empty entry
keyInited[mapSize] = false;
} else {
Object key = lazyKey.getObject();
if(!keySet.contains(key)) {
mapSize++;
keySet.add(key);
} else {
keyInited[mapSize] = false;
}
}
// reset keyValueSeparatorPosition
keyValueSeparatorPosition = -1;
elementByteBegin = elementByteEnd + 1;
elementByteEnd++;
} else {
// Is this the first keyValueSeparator in this entry?
if (keyValueSeparatorPosition == -1
&& bytes[elementByteEnd] == keyValueSeparator) {
keyValueSeparatorPosition = elementByteEnd;
}
if (isEscaped && bytes[elementByteEnd] == escapeChar
&& elementByteEnd + 1 < arrayByteEnd) {
// ignore the char after escape_char
elementByteEnd += 2;
} else {
elementByteEnd++;
}
}
}
// This makes sure we can use the same formula to compute the
// length of each value in the map.
keyStart[mapSize] = arrayByteEnd + 1;
if (mapSize > 0) {
Arrays.fill(valueInited, 0, mapSize, false);
}
}
/**
* Get the value in the map for the key.
*
* If there are multiple matches (which is possible in the serialized format),
* only the first one is returned.
*
* The most efficient way to get the value for the key is to serialize the key
* and then try to find it in the array. We do linear search because in most
* cases, user only wants to get one or two values out of the map, and the
* cost of building up a HashMap is substantially higher.
*
* @param key
* The key object that we are looking for.
* @return The corresponding value object, or NULL if not found
*/
public Object getMapValueElement(Object key) {
if (!parsed) {
parse();
}
// search for the key
for (int i = 0; i < mapSize; i++) {
LazyPrimitive<?, ?> lazyKeyI = uncheckedGetKey(i);
if (lazyKeyI == null) {
continue;
}
// getWritableObject() will convert LazyPrimitive to actual primitive
// writable objects.
Object keyI = lazyKeyI.getWritableObject();
if (keyI == null) {
continue;
}
if (keyI.equals(key)) {
// Got a match, return the value
return uncheckedGetValue(i);
}
}
return null;
}
/**
* Get the value object with the index without checking parsed.
*
* @param index
* The index into the array starting from 0
*/
private Object uncheckedGetValue(int index) {
if (valueInited[index]) {
return valueObjects[index].getObject();
}
valueInited[index] = true;
Text nullSequence = oi.getNullSequence();
int valueIBegin = keyEnd[index] + 1;
int valueILength = valueLength[index];
if (valueObjects[index] == null) {
valueObjects[index] = LazyFactory.createLazyObject(oi.getMapValueObjectInspector());
}
if (isNull(oi.getNullSequence(), bytes, valueIBegin, valueILength)) {
valueObjects[index].setNull();
} else {
valueObjects[index].init(bytes, valueIBegin, valueILength);
}
return valueObjects[index].getObject();
}
/**
* Get the key object with the index without checking parsed.
*
* @param index
* The index into the array starting from 0
*/
private LazyPrimitive<?, ?> uncheckedGetKey(int index) {
if (keyInited[index]) {
return keyObjects[index];
}
int keyIBegin = keyStart[index];
int keyILength = keyEnd[index] - keyStart[index];
if (isNull(oi.getNullSequence(), bytes, keyIBegin, keyILength)) {
return null;
}
keyInited[index] = true;
if (keyObjects[index] == null) {
keyObjects[index] = LazyFactory.createLazyPrimitiveClass(
(PrimitiveObjectInspector) oi.getMapKeyObjectInspector());
}
keyObjects[index].init(bytes, keyIBegin, keyILength);
return keyObjects[index];
}
/**
* cachedMap is reused for different calls to getMap(). But each LazyMap has a
* separate cachedMap so we won't overwrite the data by accident.
*/
protected LinkedHashMap<Object, Object> cachedMap;
/**
* Return the map object representing this LazyMap. Note that the keyObjects
* will be Writable primitive objects.
*
* @return the map object
*/
public Map<Object, Object> getMap() {
if (!parsed) {
parse();
}
if (cachedMap != null) {
return cachedMap;
}
// Use LinkedHashMap to provide deterministic order
cachedMap = new LinkedHashMap<Object, Object>();
// go through each element of the map
for (int i = 0; i < mapSize; i++) {
LazyPrimitive<?, ?> lazyKey = uncheckedGetKey(i);
if (lazyKey == null) {
continue;
}
Object key = lazyKey.getObject();
// do not overwrite if there are duplicate keys
if (key != null && !cachedMap.containsKey(key)) {
cachedMap.put(key, uncheckedGetValue(i));
}
}
return cachedMap;
}
/**
* Get the size of the map represented by this LazyMap.
*
* @return The size of the map, -1 for NULL map.
*/
public int getMapSize() {
if (!parsed) {
parse();
}
return mapSize;
}
protected boolean getParsed() {
return parsed;
}
protected void setParsed(boolean parsed) {
this.parsed = parsed;
}
}