/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.io.orc;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.SettableListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.io.Writable;
import org.apache.orc.OrcProto;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
final public class OrcStruct implements Writable {
private Object[] fields;
OrcStruct(int children) {
fields = new Object[children];
}
Object getFieldValue(int fieldIndex) {
return fields[fieldIndex];
}
void setFieldValue(int fieldIndex, Object value) {
fields[fieldIndex] = value;
}
public int getNumFields() {
return fields.length;
}
/**
* Change the number of fields in the struct. No effect if the number of
* fields is the same. The old field values are copied to the new array.
* @param numFields the new number of fields
*/
public void setNumFields(int numFields) {
if (fields.length != numFields) {
Object[] oldFields = fields;
fields = new Object[numFields];
System.arraycopy(oldFields, 0, fields, 0,
Math.min(oldFields.length, numFields));
}
}
/**
* Destructively make this object link to other's values.
* @param other the value to point to
*/
void linkFields(OrcStruct other) {
fields = other.fields;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
throw new UnsupportedOperationException("write unsupported");
}
@Override
public void readFields(DataInput dataInput) throws IOException {
throw new UnsupportedOperationException("readFields unsupported");
}
@Override
public boolean equals(Object other) {
if (other == null || other.getClass() != OrcStruct.class) {
return false;
} else {
OrcStruct oth = (OrcStruct) other;
if (fields.length != oth.fields.length) {
return false;
}
for(int i=0; i < fields.length; ++i) {
if (fields[i] == null) {
if (oth.fields[i] != null) {
return false;
}
} else {
if (!fields[i].equals(oth.fields[i])) {
return false;
}
}
}
return true;
}
}
@Override
public int hashCode() {
int result = fields.length;
for(Object field: fields) {
if (field != null) {
result ^= field.hashCode();
}
}
return result;
}
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
buffer.append("{");
for(int i=0; i < fields.length; ++i) {
if (i != 0) {
buffer.append(", ");
}
buffer.append(fields[i]);
}
buffer.append("}");
return buffer.toString();
}
static class Field implements StructField {
private final String name;
private final ObjectInspector inspector;
private final int offset;
Field(String name, ObjectInspector inspector, int offset) {
this.name = name;
this.inspector = inspector;
this.offset = offset;
}
@Override
public String getFieldName() {
return name;
}
@Override
public ObjectInspector getFieldObjectInspector() {
return inspector;
}
@Override
public int getFieldID() {
return offset;
}
@Override
public String getFieldComment() {
return null;
}
}
static class OrcStructInspector extends SettableStructObjectInspector {
private List<StructField> fields;
protected OrcStructInspector() {
super();
}
OrcStructInspector(List<StructField> fields) {
this.fields = fields;
}
OrcStructInspector(StructTypeInfo info) {
ArrayList<String> fieldNames = info.getAllStructFieldNames();
ArrayList<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
fields = new ArrayList<StructField>(fieldNames.size());
for(int i=0; i < fieldNames.size(); ++i) {
fields.add(new Field(fieldNames.get(i),
createObjectInspector(fieldTypes.get(i)), i));
}
}
OrcStructInspector(int columnId, List<OrcProto.Type> types) {
OrcProto.Type type = types.get(columnId);
int fieldCount = type.getSubtypesCount();
fields = new ArrayList<StructField>(fieldCount);
for(int i=0; i < fieldCount; ++i) {
int fieldType = type.getSubtypes(i);
fields.add(new Field(type.getFieldNames(i),
createObjectInspector(fieldType, types), i));
}
}
@Override
public List<StructField> getAllStructFieldRefs() {
return fields;
}
@Override
public StructField getStructFieldRef(String s) {
for(StructField field: fields) {
if (field.getFieldName().equalsIgnoreCase(s)) {
return field;
}
}
return null;
}
@Override
public Object getStructFieldData(Object object, StructField field) {
if (object == null) {
return null;
}
int offset = ((Field) field).offset;
OrcStruct struct = (OrcStruct) object;
if (offset >= struct.fields.length) {
return null;
}
return struct.fields[offset];
}
@Override
public List<Object> getStructFieldsDataAsList(Object object) {
if (object == null) {
return null;
}
OrcStruct struct = (OrcStruct) object;
List<Object> result = new ArrayList<Object>(struct.fields.length);
for (Object child: struct.fields) {
result.add(child);
}
return result;
}
@Override
public String getTypeName() {
StringBuilder buffer = new StringBuilder();
buffer.append("struct<");
for(int i=0; i < fields.size(); ++i) {
StructField field = fields.get(i);
if (i != 0) {
buffer.append(",");
}
buffer.append(field.getFieldName());
buffer.append(":");
buffer.append(field.getFieldObjectInspector().getTypeName());
}
buffer.append(">");
return buffer.toString();
}
@Override
public Category getCategory() {
return Category.STRUCT;
}
@Override
public Object create() {
return new OrcStruct(0);
}
@Override
public Object setStructFieldData(Object struct, StructField field,
Object fieldValue) {
OrcStruct orcStruct = (OrcStruct) struct;
int offset = ((Field) field).offset;
// if the offset is bigger than our current number of fields, grow it
if (orcStruct.getNumFields() <= offset) {
orcStruct.setNumFields(offset+1);
}
orcStruct.setFieldValue(offset, fieldValue);
return struct;
}
@Override
public boolean equals(Object o) {
if (o == null || o.getClass() != getClass()) {
return false;
} else if (o == this) {
return true;
} else {
List<StructField> other = ((OrcStructInspector) o).fields;
if (other.size() != fields.size()) {
return false;
}
for(int i = 0; i < fields.size(); ++i) {
StructField left = other.get(i);
StructField right = fields.get(i);
if (!(left.getFieldName().equalsIgnoreCase(right.getFieldName()) &&
left.getFieldObjectInspector().equals
(right.getFieldObjectInspector()))) {
return false;
}
}
return true;
}
}
}
static class OrcMapObjectInspector
implements MapObjectInspector, SettableMapObjectInspector {
private ObjectInspector key;
private ObjectInspector value;
private OrcMapObjectInspector() {
super();
}
OrcMapObjectInspector(MapTypeInfo info) {
key = createObjectInspector(info.getMapKeyTypeInfo());
value = createObjectInspector(info.getMapValueTypeInfo());
}
OrcMapObjectInspector(int columnId, List<OrcProto.Type> types) {
OrcProto.Type type = types.get(columnId);
key = createObjectInspector(type.getSubtypes(0), types);
value = createObjectInspector(type.getSubtypes(1), types);
}
@Override
public ObjectInspector getMapKeyObjectInspector() {
return key;
}
@Override
public ObjectInspector getMapValueObjectInspector() {
return value;
}
@Override
public Object getMapValueElement(Object map, Object key) {
return ((map == null || key == null)? null : ((Map) map).get(key));
}
@Override
@SuppressWarnings("unchecked")
public Map<Object, Object> getMap(Object map) {
if (map == null) {
return null;
}
return (Map) map;
}
@Override
public int getMapSize(Object map) {
if (map == null) {
return -1;
}
return ((Map) map).size();
}
@Override
public String getTypeName() {
return "map<" + key.getTypeName() + "," + value.getTypeName() + ">";
}
@Override
public Category getCategory() {
return Category.MAP;
}
@Override
public Object create() {
return new HashMap<Object,Object>();
}
@Override
public Object put(Object map, Object key, Object value) {
((Map) map).put(key, value);
return map;
}
@Override
public Object remove(Object map, Object key) {
((Map) map).remove(key);
return map;
}
@Override
public Object clear(Object map) {
((Map) map).clear();
return map;
}
@Override
public boolean equals(Object o) {
if (o == null || o.getClass() != getClass()) {
return false;
} else if (o == this) {
return true;
} else {
OrcMapObjectInspector other = (OrcMapObjectInspector) o;
return other.key.equals(key) && other.value.equals(value);
}
}
}
static class OrcListObjectInspector
implements ListObjectInspector, SettableListObjectInspector {
private ObjectInspector child;
private OrcListObjectInspector() {
super();
}
OrcListObjectInspector(ListTypeInfo info) {
child = createObjectInspector(info.getListElementTypeInfo());
}
OrcListObjectInspector(int columnId, List<OrcProto.Type> types) {
OrcProto.Type type = types.get(columnId);
child = createObjectInspector(type.getSubtypes(0), types);
}
@Override
public ObjectInspector getListElementObjectInspector() {
return child;
}
@Override
public Object getListElement(Object list, int i) {
if (list == null || i < 0 || i >= getListLength(list)) {
return null;
}
return ((List) list).get(i);
}
@Override
public int getListLength(Object list) {
if (list == null) {
return -1;
}
return ((List) list).size();
}
@Override
@SuppressWarnings("unchecked")
public List<?> getList(Object list) {
if (list == null) {
return null;
}
return (List) list;
}
@Override
public String getTypeName() {
return "array<" + child.getTypeName() + ">";
}
@Override
public Category getCategory() {
return Category.LIST;
}
@Override
public Object create(int size) {
ArrayList<Object> result = new ArrayList<Object>(size);
for(int i = 0; i < size; ++i) {
result.add(null);
}
return result;
}
@Override
public Object set(Object list, int index, Object element) {
List l = (List) list;
for(int i=l.size(); i < index+1; ++i) {
l.add(null);
}
l.set(index, element);
return list;
}
@Override
public Object resize(Object list, int newSize) {
((ArrayList) list).ensureCapacity(newSize);
return list;
}
@Override
public boolean equals(Object o) {
if (o == null || o.getClass() != getClass()) {
return false;
} else if (o == this) {
return true;
} else {
ObjectInspector other = ((OrcListObjectInspector) o).child;
return other.equals(child);
}
}
}
static public ObjectInspector createObjectInspector(TypeInfo info) {
switch (info.getCategory()) {
case PRIMITIVE:
switch (((PrimitiveTypeInfo) info).getPrimitiveCategory()) {
case FLOAT:
return PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
case DOUBLE:
return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
case BOOLEAN:
return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
case BYTE:
return PrimitiveObjectInspectorFactory.writableByteObjectInspector;
case SHORT:
return PrimitiveObjectInspectorFactory.writableShortObjectInspector;
case INT:
return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
case LONG:
return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
case BINARY:
return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
case STRING:
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
case CHAR:
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
(PrimitiveTypeInfo) info);
case VARCHAR:
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
(PrimitiveTypeInfo) info);
case TIMESTAMP:
return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
case DATE:
return PrimitiveObjectInspectorFactory.writableDateObjectInspector;
case DECIMAL:
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
(PrimitiveTypeInfo)info);
default:
throw new IllegalArgumentException("Unknown primitive type " +
((PrimitiveTypeInfo) info).getPrimitiveCategory());
}
case STRUCT:
return new OrcStructInspector((StructTypeInfo) info);
case UNION:
return new OrcUnion.OrcUnionObjectInspector((UnionTypeInfo) info);
case MAP:
return new OrcMapObjectInspector((MapTypeInfo) info);
case LIST:
return new OrcListObjectInspector((ListTypeInfo) info);
default:
throw new IllegalArgumentException("Unknown type " +
info.getCategory());
}
}
static ObjectInspector createObjectInspector(int columnId,
List<OrcProto.Type> types){
OrcProto.Type type = types.get(columnId);
switch (type.getKind()) {
case FLOAT:
return PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
case DOUBLE:
return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
case BOOLEAN:
return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
case BYTE:
return PrimitiveObjectInspectorFactory.writableByteObjectInspector;
case SHORT:
return PrimitiveObjectInspectorFactory.writableShortObjectInspector;
case INT:
return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
case LONG:
return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
case BINARY:
return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
case STRING:
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
case CHAR:
if (!type.hasMaximumLength()) {
throw new UnsupportedOperationException(
"Illegal use of char type without length in ORC type definition.");
}
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
TypeInfoFactory.getCharTypeInfo(type.getMaximumLength()));
case VARCHAR:
if (!type.hasMaximumLength()) {
throw new UnsupportedOperationException(
"Illegal use of varchar type without length in ORC type definition.");
}
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
TypeInfoFactory.getVarcharTypeInfo(type.getMaximumLength()));
case TIMESTAMP:
return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
case DATE:
return PrimitiveObjectInspectorFactory.writableDateObjectInspector;
case DECIMAL:
int precision = type.hasPrecision() ? type.getPrecision() : HiveDecimal.SYSTEM_DEFAULT_PRECISION;
int scale = type.hasScale()? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE;
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
TypeInfoFactory.getDecimalTypeInfo(precision, scale));
case STRUCT:
return new OrcStructInspector(columnId, types);
case UNION:
return new OrcUnion.OrcUnionObjectInspector(columnId, types);
case MAP:
return new OrcMapObjectInspector(columnId, types);
case LIST:
return new OrcListObjectInspector(columnId, types);
default:
throw new UnsupportedOperationException("Unknown type " +
type.getKind());
}
}
}