/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.scripting.jython; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.BagFactory; import org.apache.pig.data.DataBag; import org.apache.pig.data.DataByteArray; import org.apache.pig.data.DefaultBagFactory; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.python.core.Py; import org.python.core.PyBoolean; import org.python.core.PyDictionary; import org.python.core.PyFloat; import org.python.core.PyInteger; import org.python.core.PyList; import org.python.core.PyLong; import org.python.core.PyNone; import org.python.core.PyObject; import org.python.core.PyString; import org.python.core.PyTuple; public class JythonUtils { private static TupleFactory tupleFactory = TupleFactory.getInstance(); private static BagFactory bagFactory = DefaultBagFactory.getInstance(); @SuppressWarnings("unchecked") public static Object pythonToPig(PyObject pyObject) throws ExecException { try { Object javaObj = null; // Add code for all supported pig types here // Tuple, bag, map, int, long, float, double, chararray, bytearray if (pyObject instanceof PyTuple) { PyTuple pyTuple = (PyTuple) pyObject; Object[] tuple = new Object[pyTuple.size()]; int i = 0; for (PyObject tupleObject : pyTuple.getArray()) { tuple[i++] = pythonToPig(tupleObject); } javaObj = tupleFactory.newTuple(Arrays.asList(tuple)); } else if (pyObject instanceof PyList) { DataBag list = bagFactory.newDefaultBag(); for (PyObject bagTuple : ((PyList) pyObject).asIterable()) { // If the item of the array is not a tuple, // wrap it into tuple before adding to bag Object pigBagItem = pythonToPig(bagTuple); Tuple pigBagTuple; if (!(pigBagItem instanceof Tuple)) { pigBagTuple = TupleFactory.getInstance().newTuple(1); pigBagTuple.set(0, pigBagItem); } else { pigBagTuple = (Tuple)pigBagItem; } list.add(pigBagTuple); } javaObj = list; } else if (pyObject instanceof PyDictionary) { Map<?, Object> map = Py.tojava(pyObject, Map.class); Map<Object, Object> newMap = new HashMap<Object, Object>(); for (Map.Entry<?, Object> entry : map.entrySet()) { if (entry.getValue() instanceof PyObject) { newMap.put(entry.getKey(), pythonToPig((PyObject) entry.getValue())); } else { // Jython sometimes uses directly the java class: for example for integers newMap.put(entry.getKey(), entry.getValue()); } } javaObj = newMap; } else if (pyObject instanceof PyLong) { javaObj = pyObject.__tojava__(Long.class); } else if (pyObject instanceof PyBoolean) { javaObj = pyObject.__tojava__(Boolean.class); } else if (pyObject instanceof PyInteger) { javaObj = pyObject.__tojava__(Integer.class); } else if (pyObject instanceof PyFloat) { // J(P)ython is loosely typed, supports only float type, // hence we convert everything to double to save precision javaObj = pyObject.__tojava__(Double.class); } else if (pyObject instanceof PyString) { javaObj = pyObject.__tojava__(String.class); } else if (pyObject instanceof PyNone) { return null; } else { javaObj = pyObject.__tojava__(byte[].class); // if we successfully converted to byte[] if(javaObj instanceof byte[]) { javaObj = new DataByteArray((byte[])javaObj); } else { throw new ExecException("Non supported pig datatype found, cast failed: "+(pyObject==null?null:pyObject.getClass().getName())); } } if(javaObj.equals(Py.NoConversion)) { throw new ExecException("Cannot cast into any pig supported type: "+(pyObject==null?null:pyObject.getClass().getName())); } return javaObj; } catch (Exception e) { throw new ExecException("Cannot convert jython type ("+(pyObject==null?null:pyObject.getClass().getName())+") to pig datatype "+ e, e); } } public static PyObject pigToPython(Object object) { if (object instanceof Tuple) { return pigTupleToPyTuple((Tuple) object); } else if (object instanceof DataBag) { PyList list = new PyList(); for (Tuple bagTuple : (DataBag) object) { list.add(pigTupleToPyTuple(bagTuple)); } return list; } else if (object instanceof Map<?, ?>) { PyDictionary newMap = new PyDictionary(); for (Map.Entry<?, ?> entry : ((Map<?, ?>) object).entrySet()) { newMap.put(entry.getKey(), pigToPython(entry.getValue())); } return newMap; } else if (object instanceof DataByteArray) { return Py.java2py(((DataByteArray) object).get()); } else { return Py.java2py(object); } } public static PyTuple pigTupleToPyTuple(Tuple tuple) { PyObject[] pyTuple = new PyObject[tuple.size()]; int i = 0; for (Object object : tuple.getAll()) { pyTuple[i++] = pigToPython(object); } return new PyTuple(pyTuple); } }