package brickhouse.udf.json;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.codehaus.jackson.JsonNode;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
public interface InspectorHandle {
Object parseJson(JsonNode jsonNode);
ObjectInspector getReturnType();
final public class InspectorHandleFactory {
static public InspectorHandle GenerateInspectorHandle(ObjectInspector insp) throws UDFArgumentException {
Category cat = insp.getCategory();
switch (cat) {
case LIST:
return new InspectorHandle.ListHandle((ListObjectInspector) insp);
case MAP:
return new InspectorHandle.MapHandle((MapObjectInspector) insp);
case STRUCT:
return new InspectorHandle.StructHandle((StructObjectInspector) insp);
case PRIMITIVE:
return new InspectorHandle.PrimitiveHandle((PrimitiveObjectInspector) insp);
}
return null;
}
static public InspectorHandle GenerateInspectorHandleFromTypeInfo(String typeStr) throws UDFArgumentException {
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeStr);
ObjectInspector objInsp = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
return GenerateInspectorHandle(objInsp);
}
}
/**
* If one passes a named-struct in, then one can parse arbitrary
* structures
*/
class StructHandle implements InspectorHandle {
/**
*
*/
private List<String> fieldNames;
private List<InspectorHandle> handleList;
public StructHandle(StructObjectInspector structInspector) throws UDFArgumentException {
fieldNames = new ArrayList<String>();
handleList = new ArrayList<InspectorHandle>();
List<? extends StructField> refs = structInspector.getAllStructFieldRefs();
for (StructField ref : refs) {
fieldNames.add(ref.getFieldName());
InspectorHandle fieldHandle = InspectorHandleFactory.GenerateInspectorHandle(ref.getFieldObjectInspector());
handleList.add(fieldHandle);
}
}
@Override
public Object parseJson(JsonNode jsonNode) {
/// For structs, they just return a list of object values
if (jsonNode.isNull())
return null;
List<Object> valList = new ArrayList<Object>();
for (int i = 0; i < fieldNames.size(); ++i) {
String key = fieldNames.get(i);
JsonNode valNode = jsonNode.get(key);
InspectorHandle valHandle = handleList.get(i);
Object valObj = valHandle.parseJson(valNode);
valList.add(valObj);
}
return valList;
}
@Override
public ObjectInspector getReturnType() {
List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
for (InspectorHandle fieldHandle : handleList) {
structFieldObjectInspectors.add(fieldHandle.getReturnType());
}
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, structFieldObjectInspectors);
}
}
class MapHandle implements InspectorHandle {
private InspectorHandle mapValHandle;
private StandardMapObjectInspector retInspector;
public MapHandle() {
} // For Kryo Deserialization
/// for JSON maps (or "objects"), the keys are always string objects
///
public MapHandle(MapObjectInspector insp) throws UDFArgumentException {
if (!(insp.getMapKeyObjectInspector() instanceof StringObjectInspector)) {
throw new RuntimeException(" JSON maps can only have strings as keys");
}
mapValHandle = InspectorHandleFactory.GenerateInspectorHandle(insp.getMapValueObjectInspector());
}
@Override
public Object parseJson(JsonNode jsonNode) {
if (jsonNode.isNull())
return null;
Map<String, Object> newMap = (Map<String, Object>) retInspector.create();
Iterator<String> keys = jsonNode.getFieldNames();
while (keys.hasNext()) {
String key = keys.next();
JsonNode valNode = jsonNode.get(key);
Object val = mapValHandle.parseJson(valNode);
newMap.put(key, val);
}
return newMap;
}
@Override
public ObjectInspector getReturnType() {
retInspector = ObjectInspectorFactory.getStandardMapObjectInspector(
PrimitiveObjectInspectorFactory.javaStringObjectInspector,
mapValHandle.getReturnType());
return retInspector;
}
}
public class ListHandle implements InspectorHandle {
private StandardListObjectInspector retInspector;
private InspectorHandle elemHandle;
public ListHandle(ListObjectInspector insp) throws UDFArgumentException {
elemHandle = InspectorHandleFactory.GenerateInspectorHandle(insp.getListElementObjectInspector());
}
@Override
public Object parseJson(JsonNode jsonNode) {
if (jsonNode.isNull())
return null;
List newList = (List) retInspector.create(0);
Iterator<JsonNode> listNodes = jsonNode.getElements();
while (listNodes.hasNext()) {
JsonNode elemNode = listNodes.next();
if (elemNode != null) {
Object elemObj = elemHandle.parseJson(elemNode);
newList.add(elemObj);
} else {
newList.add(null);
}
}
return newList;
}
@Override
public ObjectInspector getReturnType() {
retInspector = ObjectInspectorFactory.getStandardListObjectInspector(elemHandle.getReturnType());
return retInspector;
}
}
class PrimitiveHandle implements InspectorHandle {
private PrimitiveCategory category;
private DateTimeFormatter isoFormatter = ISODateTimeFormat.dateTimeNoMillis();
public PrimitiveHandle(PrimitiveObjectInspector insp) throws UDFArgumentException {
category = insp.getPrimitiveCategory();
}
@Override
public Object parseJson(JsonNode jsonNode) {
if (jsonNode == null
|| jsonNode.isNull()) {
return null;
}
switch (category) {
case STRING:
if (jsonNode.isTextual())
return jsonNode.getTextValue();
else
return jsonNode.toString();
case LONG:
return jsonNode.getLongValue();
case SHORT:
return (short) jsonNode.getIntValue();
case BYTE:
return (byte) jsonNode.getIntValue();
case BINARY:
try {
return jsonNode.getBinaryValue();
} catch (IOException ioExc) {
return jsonNode.toString();
}
case INT:
return jsonNode.getIntValue();
case FLOAT:
return new Float(jsonNode.getDoubleValue());
case DOUBLE:
return jsonNode.getDoubleValue();
case BOOLEAN:
return jsonNode.getBooleanValue();
case TIMESTAMP:
long time = isoFormatter.parseMillis(jsonNode.getTextValue());
return new Timestamp(time);
}
return null;
}
@Override
public ObjectInspector getReturnType() {
return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(category);
}
}
}