package brickhouse.udf.json; /** * Copyright 2012 Klout, Inc * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * **/ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.codehaus.jackson.JsonNode; import org.codehaus.jackson.JsonProcessingException; import org.codehaus.jackson.map.ObjectMapper; import java.io.IOException; /** * Generate an arbitrary Hive structure from a JSON string, * and an example template object. * <p/> * The UDF takes a JSON string as the first argument, and the second argument defines * the return type of the UDF, and which fields are parsed from the JSON string. * To parse JSON maps with values of varying types, use struct() to create a structure * with the desired JSON keys. The template object should be constant. * <p/> * For example, * from_json( " { "name":"Bob","value":23.0,colors["red","yellow","green"], * "inner_map":{"a":1,"b":2,"c":3 }" , * struct("name", "","value", 0.0,"colors", array(""), "inner_map", map("",1) ); */ @Description(name = "from_json", value = "_FUNC_(json,template,convert_flag) - Returns an arbitrary Hive Structure given a JSON string, and an example template object." ) public class FromJsonUDF extends GenericUDF { private StringObjectInspector jsonInspector; private InspectorHandle inspHandle; @Override public Object evaluate(DeferredObject[] arg0) throws HiveException { try { String jsonString = jsonInspector.getPrimitiveJavaObject(arg0[0].get()); if (jsonString == null) return null; ObjectMapper jacksonParser = new ObjectMapper(); JsonNode jsonNode = jacksonParser.readTree(jsonString); return inspHandle.parseJson(jsonNode); } catch (JsonProcessingException e) { throw new HiveException(e); } catch (IOException e) { throw new HiveException(e); } } @Override public String getDisplayString(String[] arg0) { return "from_json( \"" + arg0[0] + "\" , \"" + arg0[1] + "\" )"; } @Override public ObjectInspector initialize(ObjectInspector[] arg0) throws UDFArgumentException { if (arg0.length != 2) { throw new UDFArgumentException("from_json expects a JSON string and a template object"); } if (arg0[0].getCategory() != Category.PRIMITIVE || ((PrimitiveObjectInspector) arg0[0]).getPrimitiveCategory() != PrimitiveCategory.STRING) { throw new UDFArgumentException("from_json expects a JSON string and a template object"); } jsonInspector = (StringObjectInspector) arg0[0]; if (arg0[1].getCategory() == Category.PRIMITIVE && ((PrimitiveObjectInspector) arg0[1]).getPrimitiveCategory() == PrimitiveCategory.STRING) { if (!(arg0[1] instanceof ConstantObjectInspector)) { throw new UDFArgumentException("typeinfo string must be constant"); } ConstantObjectInspector typeInsp = (ConstantObjectInspector) arg0[1]; String typeStr = typeInsp.getWritableConstantValue().toString(); inspHandle = InspectorHandle.InspectorHandleFactory.GenerateInspectorHandleFromTypeInfo(typeStr); } else { inspHandle = InspectorHandle.InspectorHandleFactory.GenerateInspectorHandle(arg0[1]); } return inspHandle.getReturnType(); } static public String ToCamelCase(String underscore) { StringBuilder sb = new StringBuilder(); String[] splArr = underscore.toLowerCase().split("_"); sb.append(splArr[0]); for (int i = 1; i < splArr.length; ++i) { String word = splArr[i]; char firstChar = word.charAt(0); if (firstChar >= 'a' && firstChar <= 'z') { sb.append((char) (word.charAt(0) + 'A' - 'a')); sb.append(word.substring(1)); } else { sb.append(word); } } return sb.toString(); } /** * Converts from CamelCase to a string containing * underscores. * * @param camel * @return */ static public String FromCamelCase(String camel) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < camel.length(); ++i) { char ch = camel.charAt(i); if (ch >= 'A' && ch <= 'Z') { sb.append('_'); sb.append((char) (ch - 'A' + 'a')); } else { sb.append(ch); } } return sb.toString(); } }