package com.livingsocial.hive.utils; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import javax.script.Invocable; import javax.script.ScriptEngine; import javax.script.ScriptEngineManager; import javax.script.ScriptException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; public class ScriptingHelper { // Offset to get past the constants in the arg list private static int ARG_OFFSET = 3; public static class InitializationContainer { public String script; public String language; public ObjectInspector[] argumentOIs; public String returnType; public ObjectInspector outputOi; public ObjectInspector outputJavaOi; public GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; public int argOffset = ARG_OFFSET; public Invocable engine; } public static InitializationContainer initialize(ObjectInspector[] arguments) throws SemanticException { InitializationContainer rtn = new InitializationContainer(); if (arguments != null) { // Only validate inputs if this is the main call, for merging and terminating this step is not needed. // Nothing else can really be validated until evaluation time if (arguments.length < rtn.argOffset+1) { throw new SemanticException( "At least " + (rtn.argOffset+1) + " arguments are required, the script to run, the script language, the return type, and at least one argument, got " + arguments.length + " arguments passed in"); } // Convert all the constant string params rtn.script = getConstString(arguments[0], 1); rtn.language = getConstString(arguments[1], 2); rtn.returnType = getConstString(arguments[2], 3); // Get converters for all the actual arguments rtn.argumentOIs = new ObjectInspector[arguments.length - rtn.argOffset]; System.arraycopy(arguments, rtn.argOffset, rtn.argumentOIs, 0, rtn.argumentOIs.length); rtn.outputOi = javaObjectInspectorFromType(rtn.returnType); rtn.outputJavaOi = javaObjectInspectorFromType(rtn.returnType); rtn.returnOIResolver = buildReturnResolver(rtn.outputOi); } // Do this to test the script and make sure it's valid client-side try { rtn.engine = initializeEngine(rtn.language, rtn.script); } catch (HiveException e) { throw new SemanticException("The Script or Language settings seem to have problems: " + e, e); } return rtn; } public static String getConstString(ObjectInspector argument, int num) throws UDFArgumentTypeException { if (!ObjectInspectorUtils.isConstantObjectInspector(argument)) { new Exception().printStackTrace(System.err); throw new UDFArgumentTypeException(num, "The script argument " + num + " must be a constant string, but " + argument.getTypeName() + " was passed instead. (class=" + argument.getClass().getName() + ")"); } return (String) ObjectInspectorConverters.getConverter( argument, PrimitiveObjectInspectorFactory.javaStringObjectInspector).convert(((ConstantObjectInspector)argument).getWritableConstantValue()); } public static ObjectInspector javaObjectInspectorFromType(String returnType) throws SemanticException { return TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(TypeInfoUtils.getTypeInfoFromTypeString(returnType)); } public static ObjectInspector writableObjectInspectorFromType(String returnType) throws SemanticException { return TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(TypeInfoUtils.getTypeInfoFromTypeString(returnType)); } public static GenericUDFUtils.ReturnObjectInspectorResolver buildReturnResolver(ObjectInspector outputOi) throws SemanticException { GenericUDFUtils.ReturnObjectInspectorResolver rtn = new GenericUDFUtils.ReturnObjectInspectorResolver(true); rtn.update(outputOi); return rtn; } public static Converter getConverter(ObjectInspector oi) { ObjectInspector output = ObjectInspectorUtils.getStandardObjectInspector(oi); return output != null ? ObjectInspectorConverters.getConverter(oi, output) : null; } /** * Builds an invocable scripting engine using the passed in args. * This loads and compiles the script so the functions are available * in the returned Invocable engine. */ public static Invocable initializeEngine(String language, String script) throws HiveException { // Make sure we can find a scripting engine for the language ScriptEngine tmp = new ScriptEngineManager().getEngineByName(language); if (tmp == null) { throw new HiveException( "Could not find a script implementation for language " + language); } if (!(tmp instanceof Invocable)) { throw new HiveException("The script engine for " + language + " doesn't support invocable"); } Invocable engine = (Invocable) tmp; String scriptText; if (script.startsWith("/")) { // The file is a file in HDFS // Note: this is not the best way to do this, but it works Configuration conf = new Configuration(); String root = conf.get("fs.defaultFS"); String path = root + script; try { FileSystem fs = FileSystem.get(conf); Path scriptFile = new Path(path); BufferedReader reader = new BufferedReader(new InputStreamReader( fs.open(scriptFile))); String line = null; StringBuilder scriptBuilder = new StringBuilder(); while ((line = reader.readLine()) != null) { scriptBuilder.append(line); scriptBuilder.append("\n"); } scriptText = scriptBuilder.toString(); } catch (IOException e) { throw new HiveException( "Unable to load the script from file " + script, e); } } else { // The script is a literal script and should be handled directly scriptText = script; } try { tmp.eval(scriptText); } catch (ScriptException e) { throw new HiveException( "Something went wrong with the script when evaluating it", e); } return engine; } }