/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.scripting.groovy; import java.math.BigDecimal; import java.math.BigInteger; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.BagFactory; import org.apache.pig.data.DataBag; import org.apache.pig.data.DataByteArray; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; public class GroovyUtils { private static final TupleFactory tupleFactory = TupleFactory.getInstance(); private static final BagFactory bagFactory = BagFactory.getInstance(); /** * Converts an object created on the Groovy side to its Pig counterpart. * * The conversions are as follow: * * Groovy Pig * Object[] Tuple * groovy.lang.Tuple Tuple * org.apache.pig.data.Tuple Tuple * org.apache.pig.data.DataBag DataBag * java.util.Map Map * java.util.List DataBag * Byte/Short/Integer int * Long long * Float float * Double double * BigInteger BigInteger * BigDecimal BigDecimal * String chararray * byte[] DataByteArray (copy) * Boolean boolean * org.joda.time.DateTime org.joda.time.DateTime * null null * * anything else raises an exception * * @param groovyObject * Groovy object to convert * @return the Pig counterpart of groovyObject * * @throws ExecException */ public static Object groovyToPig(Object groovyObject) throws ExecException { Object pigObject = null; if (groovyObject instanceof Object[] || groovyObject instanceof groovy.lang.Tuple) { // // Allocate a List<Object> that will be filled with converted // objects and later passed to newTuple. // List<Object> pigObjects = new ArrayList<Object>(); // // Convert each member of groovyObject // if (groovyObject instanceof Object[]) { for (Object o : (Object[]) groovyObject) { pigObjects.add(groovyToPig(o)); } } else { for (Object o : (Iterable) groovyObject) { pigObjects.add(groovyToPig(o)); } } // // Create the result Tuple // pigObject = tupleFactory.newTuple(pigObjects); } else if (groovyObject instanceof Tuple || groovyObject instanceof DataBag) { // // Copy Pig Tuple/DataBag as is // This enables the creation of instances of DataBag which do not fit in // memory // // It is advised to wrap objects into a call to groovyToPig // prior to adding them to a Tuple or DataBag // pigObject = groovyObject; } else if (groovyObject instanceof Map) { // // Allocate a Map // Map<String, Object> pigMap = new HashMap<String, Object>(); // // Iterate over Groovy Map, putting each entry into pigMap // for (Map.Entry<?, ?> entry : ((Map<?, ?>) groovyObject).entrySet()) { pigMap.put(groovyToPig(entry.getKey()).toString(), groovyToPig(entry.getValue())); } pigObject = pigMap; } else if (groovyObject instanceof List) { // // Allocate a DataBag // DataBag bag = bagFactory.newDefaultBag(); // // Pig's bags can only contain tuples, so we cast the return value // of groovyToPig to a Tuple, if it's not a tuple, a ClassCastException // will // be thrown. // for (Object o : (List) groovyObject) { Object p = groovyToPig(o); if (p instanceof Tuple || null == p) { bag.add((Tuple) p); } else { // Wrap value in a Tuple if it's not already a tuple bag.add(tupleFactory.newTuple(p)); } } pigObject = bag; } else if (groovyObject instanceof Integer || groovyObject instanceof Long || groovyObject instanceof Float || groovyObject instanceof Double) { // // Numeric types which have an equivalent in Pig are passed as is as they // are immutable // pigObject = groovyObject; } else if (groovyObject instanceof Byte || groovyObject instanceof Short) { pigObject = ((Number) groovyObject).intValue(); } else if (groovyObject instanceof BigInteger) { pigObject = groovyObject; } else if (groovyObject instanceof BigDecimal) { pigObject = groovyObject; } else if (groovyObject instanceof byte[]) { // // Clone the byte array // byte[] b = new byte[((byte[]) groovyObject).length]; System.arraycopy((byte[]) groovyObject, 0, b, 0, b.length); pigObject = new DataByteArray(b); } else if (groovyObject instanceof String) { // // String is immutable, so pass it as is // pigObject = groovyObject; } else if (groovyObject instanceof Boolean) { pigObject = groovyObject; } else if (groovyObject instanceof org.joda.time.DateTime) { // // jodatime's DateTime is immutable, so reuse the same instance // pigObject = groovyObject; } else if (null == groovyObject) { pigObject = null; } else { throw new ExecException("Unable to cast " + groovyObject.getClass().getName() + " to a Pig datatype."); } return pigObject; } /** * Converts an object created on the Pig side to its Groovy counterpart. * * The conversions are as follow: * * Pig Groovy * Tuple groovy.lang.tuple * DataBag groovy.lang.Tuple containing the bag's size and an iterator on its * content * Map java.util.Map * int/long/float/double as is * chararray String * bytearray byte[] (copy) * boolean boolean * BigInteger BigInteger * BigDecimal BigDecimal * org.joda.time.DateTime org.joda.time.DateTime * null null * * anything else raises an exception * * @param pigObject * @return Object * @throws ExecException */ public static Object pigToGroovy(Object pigObject) throws ExecException { Object groovyObject = null; if (pigObject instanceof Tuple) { Object[] a = new Object[((Tuple) pigObject).size()]; int i = 0; for (Object oo : ((Tuple) pigObject).getAll()) { a[i++] = pigToGroovy(oo); } groovyObject = new groovy.lang.Tuple(a); } else if (pigObject instanceof DataBag) { // // Return a Groovy Tuple containing the bag's size and an // iterator on its content (Iterator will return instances of // groovy.lang.Tuple) // Object[] size_iterator = new Object[2]; size_iterator[0] = ((DataBag) pigObject).size(); size_iterator[1] = new DataBagGroovyIterator(((DataBag) pigObject).iterator()); groovyObject = new groovy.lang.Tuple(size_iterator); } else if (pigObject instanceof Map) { Map<String, Object> m = new HashMap<String, Object>(); for (Map.Entry<String, ?> entry : ((Map<String, ?>) pigObject).entrySet()) { m.put((String) pigToGroovy(entry.getKey()), pigToGroovy(entry.getValue())); } groovyObject = m; } else if (pigObject instanceof Number || pigObject instanceof String || pigObject instanceof Boolean) { groovyObject = pigObject; } else if (pigObject instanceof DataByteArray) { // // Allocate a new byte array so we don't use the original array // byte[] b = new byte[((DataByteArray) pigObject).size()]; System.arraycopy(((DataByteArray) pigObject).get(), 0, b, 0, b.length); groovyObject = b; } else if (pigObject instanceof BigInteger) { groovyObject = pigObject; } else if (pigObject instanceof BigDecimal) { groovyObject = pigObject; }else if (pigObject instanceof org.joda.time.DateTime) { // // jodatime's DateTime is immutable, so reuse the same instance // groovyObject = pigObject; } else if (null == pigObject) { groovyObject = null; } else { throw new ExecException("Unable to cast pig datatype " + pigObject.getClass().getName() + " to a suitable Groovy Object."); } return groovyObject; } public static class DataBagGroovyIterator implements Iterator<groovy.lang.Tuple> { private final Iterator<Tuple> iter; public DataBagGroovyIterator(Iterator<Tuple> iter) { this.iter = iter; } @Override public boolean hasNext() { return iter.hasNext(); } @Override public groovy.lang.Tuple next() { try { return (groovy.lang.Tuple) pigToGroovy(iter.next()); } catch (ExecException ee) { throw new RuntimeException(ee); } } @Override public void remove() { } } }