/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.alibaba.garuda.data; import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.TreeMap; import com.alibaba.garuda.plan.FrontendException; /** * A class of static final values used to encode data type and a number of * static helper funcitons for manipulating data objects. The data type * values could be * done as an enumeration, but it is done as byte codes instead to save * creating objects. */ public class DataType { // IMPORTANT! This list can be used to record values of data on disk, // so do not change the values. You may strand user data. // IMPORTANT! Order matters here, as compare() below uses the order to // order unlike datatypes. Don't change this ordering. // Spaced unevenly to leave room for new entries without changing // values or creating order issues. public static final byte UNKNOWN = 0; public static final byte NULL = 1; public static final byte BOOLEAN = 5; // internal use only public static final byte BYTE = 6; // internal use only public static final byte INTEGER = 10; public static final byte LONG = 15; public static final byte FLOAT = 20; public static final byte DOUBLE = 25; public static final byte BYTEARRAY = 50; public static final byte CHARARRAY = 55; /** * Internal use only. */ public static final byte BIGCHARARRAY = 60; //internal use only; for storing/loading chararray bigger than 64K characters in BinStorage public static final byte MAP = 100; public static final byte TUPLE = 110; public static final byte BAG = 120; /** * Internal use only; used to store WriteableComparable objects * for creating ordered index in MergeJoin. Expecting a object that * implements Writable interface and has default constructor */ public static final byte GENERIC_WRITABLECOMPARABLE = 123; /** * Internal use only. */ public static final byte INTERNALMAP = 127; // internal use only; for maps that are object->object. Used by FindQuantiles. public static final byte ERROR = -1; /** * Determine the datatype of an object. * @param o Object to test. * @return byte code of the type, or ERROR if we don't know. */ public static byte findType(Object o) { if (o == null) { return NULL; } // Try to put the most common first // if (o instanceof DataByteArray) { // return BYTEARRAY; // } else if (o instanceof String) { return CHARARRAY; // } else if (o instanceof Tuple) { // return TUPLE; // } else if (o instanceof DataBag) { // return BAG; } else if (o instanceof Integer) { return INTEGER; } else if (o instanceof Long) { return LONG; // } else if (o instanceof InternalMap) { // return INTERNALMAP; } else if (o instanceof Map) { return MAP; } else if (o instanceof Float) { return FLOAT; } else if (o instanceof Double) { return DOUBLE; } else if (o instanceof Boolean) { return BOOLEAN; } else if (o instanceof Byte) { return BYTE; } else {return ERROR;} } /** * Given a Type object determine the data type it represents. This isn't * cheap, as it uses reflection, so use sparingly. * @param t Type to examine * @return byte code of the type, or ERROR if we don't know. */ public static byte findType(Type t) { if (t == null) { return NULL; } // Try to put the most common first // if (t == DataByteArray.class) { // return BYTEARRAY; // } else if (t == String.class) { return CHARARRAY; } else if (t == Integer.class) { return INTEGER; } else if (t == Long.class) { return LONG; } else if (t == Float.class) { return FLOAT; } else if (t == Double.class) { return DOUBLE; } else if (t == Boolean.class) { return BOOLEAN; } else if (t == Byte.class) { return BYTE; // } else if (t == InternalMap.class) { // return INTERNALMAP; } else { // Might be a tuple or a bag, need to check the interfaces it // implements if (t instanceof Class) { return extractTypeFromClass(t); }else if (t instanceof ParameterizedType){ ParameterizedType impl=(ParameterizedType)t; Class c=(Class)impl.getRawType(); return extractTypeFromClass(c); } return ERROR; } } private static byte extractTypeFromClass(Type t) { Class c = (Class)t; Class[] ioeInterfaces = c.getInterfaces(); Class[] interfaces = null; if(c.isInterface()){ interfaces = new Class[ioeInterfaces.length+1]; interfaces[0] = c; for (int i = 1; i < interfaces.length; i++) { interfaces[i] = ioeInterfaces[i-1]; } } else { interfaces = ioeInterfaces; } boolean matchedWritableComparable = false; for (int i = 0; i < interfaces.length; i++) { if (interfaces[i].getName().equals("org.apache.pig.data.Tuple")) { return TUPLE; } else if (interfaces[i].getName().equals("org.apache.pig.data.DataBag")) { return BAG; } else if (interfaces[i].getName().equals("java.util.Map")) { return MAP; } else if (interfaces[i].getName().equals("org.apache.hadoop.io.WritableComparable")) { // use GENERIC_WRITABLECOMPARABLE type only as last resort matchedWritableComparable = true; } } if(matchedWritableComparable) { return GENERIC_WRITABLECOMPARABLE; } return ERROR; } /** * Return the number of types Pig knows about. * @return number of types */ public static int numTypes(){ byte[] types = genAllTypes(); return types.length; } /** * Get an array of all type values. * @return byte array with an entry for each type. */ public static byte[] genAllTypes(){ byte[] types = { DataType.BAG, DataType.BIGCHARARRAY, DataType.BOOLEAN, DataType.BYTE, DataType.BYTEARRAY, DataType.CHARARRAY, DataType.DOUBLE, DataType.FLOAT, DataType.GENERIC_WRITABLECOMPARABLE, DataType.INTEGER, DataType.INTERNALMAP, DataType.LONG, DataType.MAP, DataType.TUPLE}; return types; } private static String[] genAllTypeNames(){ String[] names = { "BAG", "BIGCHARARRAY", "BOOLEAN", "BYTE", "BYTEARRAY", "CHARARRAY", "DOUBLE", "FLOAT", "GENERIC_WRITABLECOMPARABLE", "INTEGER","INTERNALMAP", "LONG", "MAP", "TUPLE" }; return names; } /** * Get a map of type values to type names. * @return map */ public static Map<Byte, String> genTypeToNameMap(){ byte[] types = genAllTypes(); String[] names = genAllTypeNames(); Map<Byte,String> ret = new HashMap<Byte, String>(); for(int i=0;i<types.length;i++){ ret.put(types[i], names[i]); } return ret; } /** * Get a map of type names to type values. * @return map */ public static Map<String, Byte> genNameToTypeMap(){ byte[] types = genAllTypes(); String[] names = genAllTypeNames(); Map<String, Byte> ret = new HashMap<String, Byte>(); for(int i=0;i<types.length;i++){ ret.put(names[i], types[i]); } return ret; } /** * Get the type name. * @param o Object to test. * @return type name, as a String. */ public static String findTypeName(Object o) { return findTypeName(findType(o)); } /** * Get the type name from the type byte code * @param dt Type byte code * @return type name, as a String. */ public static String findTypeName(byte dt) { switch (dt) { case NULL: return "NULL"; case BOOLEAN: return "boolean"; case BYTE: return "byte"; case INTEGER: return "int"; case LONG: return "long"; case FLOAT: return "float"; case DOUBLE: return "double"; case BYTEARRAY: return "bytearray"; case BIGCHARARRAY: return "bigchararray"; case CHARARRAY: return "chararray"; case MAP: return "map"; case INTERNALMAP: return "internalmap"; case TUPLE: return "tuple"; case BAG: return "bag"; case GENERIC_WRITABLECOMPARABLE: return "generic_writablecomparable"; default: return "Unknown"; } } /** * Determine whether the this data type is complex. * @param dataType Data type code to test. * @return true if dataType is bag, tuple, or map. */ public static boolean isComplex(byte dataType) { return ((dataType == BAG) || (dataType == TUPLE) || (dataType == MAP) || (dataType == INTERNALMAP)); } /** * Determine whether the object is complex or atomic. * @param o Object to determine type of. * @return true if dataType is bag, tuple, or map. */ public static boolean isComplex(Object o) { return isComplex(findType(o)); } /** * Determine whether the this data type is atomic. * @param dataType Data type code to test. * @return true if dataType is bytearray, bigchararray, chararray, integer, long, * float, or boolean. */ public static boolean isAtomic(byte dataType) { return ((dataType == BYTEARRAY) || (dataType == CHARARRAY) || (dataType == BIGCHARARRAY) || (dataType == INTEGER) || (dataType == LONG) || (dataType == FLOAT) || (dataType == DOUBLE) || (dataType == BOOLEAN) || (dataType == BYTE) || (dataType == GENERIC_WRITABLECOMPARABLE)); } /** * Determine whether the this data type is atomic. * @param o Object to determine type of. * @return true if dataType is bytearray, chararray, integer, long, * float, or boolean. */ public static boolean isAtomic(Object o) { return isAtomic(findType(o)); } /** * Determine whether the this object can have a schema. * @param o Object to determine if it has a schema * @return true if the type can have a valid schema (i.e., bag or tuple) */ public static boolean isSchemaType(Object o) { return isSchemaType(findType(o)); } /** * Determine whether the this data type can have a schema. * @param dataType dataType to determine if it has a schema * @return true if the type can have a valid schema (i.e., bag or tuple) */ public static boolean isSchemaType(byte dataType) { return ((dataType == BAG) || (dataType == TUPLE) || dataType == MAP); } // /** // /** // * Compare two objects to each other. This function is necessary // * because there's no super class that implements compareTo. This // * function provides an (arbitrary) ordering of objects of different // * types as follows: NULL < BOOLEAN < BYTE < INTEGER < LONG < // * FLOAT < DOUBLE * < BYTEARRAY < STRING < MAP < // * TUPLE < BAG. No other functions should implement this cross // * object logic. They should call this function for it instead. // * @param o1 First object // * @param o2 Second object // * @return -1 if o1 is less, 0 if they are equal, 1 if o2 is less. // */ // public static int compare(Object o1, Object o2) { // // byte dt1 = findType(o1); // byte dt2 = findType(o2); // return compare(o1, o2, dt1, dt2); // } // /** // * Same as {@link #compare(Object, Object)}, but does not use reflection to determine the type // * of passed in objects, relying instead on the caller to provide the appropriate values, as // * determined by {@link DataType#findType(Object)}. // * // * Use this version in cases where multiple objects of the same type have to be repeatedly compared. // * @param o1 first object // * @param o2 second object // * @param dt1 type, as byte value, of o1 // * @param dt2 type, as byte value, of o2 // * @return -1 if o1 is < o2, 0 if they are equal, 1 if o1 > o2 // */ // @SuppressWarnings("unchecked") // public static int compare(Object o1, Object o2, byte dt1, byte dt2) { // if (dt1 == dt2) { // switch (dt1) { // case NULL: // return 0; // // case BOOLEAN: // return ((Boolean)o1).compareTo((Boolean)o2); // // case BYTE: // return ((Byte)o1).compareTo((Byte)o2); // // case INTEGER: // return ((Integer)o1).compareTo((Integer)o2); // // case LONG: // return ((Long)o1).compareTo((Long)o2); // // case FLOAT: // return ((Float)o1).compareTo((Float)o2); // // case DOUBLE: // return ((Double)o1).compareTo((Double)o2); // //// case BYTEARRAY: //// return ((DataByteArray)o1).compareTo(o2); // // case CHARARRAY: // return ((String)o1).compareTo((String)o2); // // case MAP: { // Map<String, Object> m1 = (Map<String, Object>)o1; // Map<String, Object> m2 = (Map<String, Object>)o2; // int sz1 = m1.size(); // int sz2 = m2.size(); // if (sz1 < sz2) { // return -1; // } else if (sz1 > sz2) { // return 1; // } else { // // This is bad, but we have to sort the keys of the maps in order // // to be commutative. // TreeMap<String, Object> tm1 = new TreeMap<String, Object>(m1); // TreeMap<String, Object> tm2 = new TreeMap<String, Object>(m2); // Iterator<Map.Entry<String, Object> > i1 = // tm1.entrySet().iterator(); // Iterator<Map.Entry<String, Object> > i2 = // tm2.entrySet().iterator(); // while (i1.hasNext()) { // Map.Entry<String, Object> entry1 = i1.next(); // Map.Entry<String, Object> entry2 = i2.next(); // int c = entry1.getKey().compareTo(entry2.getKey()); // if (c != 0) { // return c; // } else { // c = compare(entry1.getValue(), entry2.getValue()); // if (c != 0) { // return c; // } // } // } // return 0; // } // } // // case GENERIC_WRITABLECOMPARABLE: // return ((Comparable)o1).compareTo(o2); // // case INTERNALMAP: // return -1; // Don't think anyway will want to do this. // // case TUPLE: // return ((Tuple)o1).compareTo(o2); // // case BAG: // return ((DataBag)o1).compareTo(o2); // // // default: // throw new RuntimeException("Unkown type " + dt1 + // " in compare"); // } // } else if (dt1 < dt2) { // return -1; // } else { // return 1; // } // } public static byte[] toBytes(Object o) throws FrontendException { return toBytes(o, findType(o)); } @SuppressWarnings("unchecked") public static byte[] toBytes(Object o, byte type) throws FrontendException { switch (type) { case BOOLEAN: return ((Boolean) o).booleanValue() ? new byte[] {1} : new byte[] {0}; case BYTE: return new byte[] {((Byte) o)}; case INTEGER: case DOUBLE: case FLOAT: case LONG: return ((Number) o).toString().getBytes(); case CHARARRAY: return ((String) o).getBytes(); case MAP: return mapToString((Map<String, Object>) o).getBytes(); // case TUPLE: // return ((Tuple) o).toString().getBytes(); // case BYTEARRAY: // return ((DataByteArray) o).get(); // case BAG: // return ((DataBag) o).toString().getBytes(); case NULL: return null; default: int errCode = 1071; String msg = "Cannot convert a " + findTypeName(o) + " to a ByteArray"; throw new FrontendException(msg); } } // /** // * Force a data object to an Integer, if possible. Any numeric type // * can be forced to an Integer (though precision may be lost), as well // * as CharArray, ByteArray, or Boolean. Complex types cannot be // * forced to an Integer. This isn't particularly efficient, so if you // * already <b>know</b> that the object you have is an Integer you // * should just cast it. // * @param o object to cast // * @param type of the object you are casting // * @return The object as an Integer. // * @throws FrontendException if the type can't be forced to an Integer. // */ // public static Integer toInteger(Object o,byte type) throws FrontendException { // try { // switch (type) { // case BOOLEAN: // if (((Boolean)o) == true) { // return Integer.valueOf(1); // } else { // return Integer.valueOf(0); // } // // case BYTE: // return Integer.valueOf(((Byte)o).intValue()); // // case INTEGER: // return (Integer)o; // // case LONG: // return Integer.valueOf(((Long)o).intValue()); // // case FLOAT: // return Integer.valueOf(((Float)o).intValue()); // // case DOUBLE: // return Integer.valueOf(((Double)o).intValue()); // // case BYTEARRAY: // return Integer.valueOf(((DataByteArray)o).toString()); // // case CHARARRAY: // return Integer.valueOf((String)o); // // case NULL: // return null; // // case MAP: // case INTERNALMAP: // case TUPLE: // case BAG: // case UNKNOWN: // default: // int errCode = 1071; // String msg = "Cannot convert a " + findTypeName(o) + // " to an Integer"; // throw new FrontendException(msg, errCode, PigException.INPUT); // } // } catch (ClassCastException cce) { // throw cce; // } catch (FrontendException ee) { // throw ee; // } catch (NumberFormatException nfe) { // int errCode = 1074; // String msg = "Problem with formatting. Could not convert " + o + " to Integer."; // throw new FrontendException(msg, errCode, PigException.INPUT, nfe); // } catch (Exception e) { // int errCode = 2054; // String msg = "Internal error. Could not convert " + o + " to Integer."; // throw new FrontendException(msg, errCode, PigException.BUG); // } // } // /** // * Force a data object to an Integer, if possible. Any numeric type // * can be forced to an Integer (though precision may be lost), as well // * as CharArray, ByteArray, or Boolean. Complex types cannot be // * forced to an Integer. This isn't particularly efficient, so if you // * already <b>know</b> that the object you have is an Integer you // * should just cast it. Unlike {@link #toInteger(Object, byte)} this // * method will first determine the type of o and then do the cast. // * Use {@link #toInteger(Object, byte)} if you already know the type. // * @param o object to cast // * @return The object as an Integer. // * @throws FrontendException if the type can't be forced to an Integer. // */ // public static Integer toInteger(Object o) throws FrontendException { // return toInteger(o, findType(o)); // } // /** // * Force a data object to a Long, if possible. Any numeric type // * can be forced to a Long (though precision may be lost), as well // * as CharArray, ByteArray, or Boolean. Complex types cannot be // * forced to a Long. This isn't particularly efficient, so if you // * already <b>know</b> that the object you have is a Long you // * should just cast it. // * @param o object to cast // * @param type of the object you are casting // * @return The object as a Long. // * @throws FrontendException if the type can't be forced to a Long. // */ // public static Long toLong(Object o,byte type) throws FrontendException { // try { // switch (type) { // case BOOLEAN: // if (((Boolean)o) == true) { // return Long.valueOf(1); // } else { // return Long.valueOf(0); // } // // case BYTE: // return Long.valueOf(((Byte)o).longValue()); // // case INTEGER: // return Long.valueOf(((Integer)o).longValue()); // // case LONG: // return (Long)o; // // case FLOAT: // return Long.valueOf(((Float)o).longValue()); // // case DOUBLE: // return Long.valueOf(((Double)o).longValue()); // // case BYTEARRAY: // return Long.valueOf(((DataByteArray)o).toString()); // // case CHARARRAY: // return Long.valueOf((String)o); // // case NULL: // return null; // // case MAP: // case INTERNALMAP: // case TUPLE: // case BAG: // case UNKNOWN: // default: // int errCode = 1071; // String msg = "Cannot convert a " + findTypeName(o) + // " to a Long"; // throw new FrontendException(msg, errCode, PigException.INPUT); // } // } catch (ClassCastException cce) { // throw cce; // } catch (FrontendException ee) { // throw ee; // } catch (NumberFormatException nfe) { // int errCode = 1074; // String msg = "Problem with formatting. Could not convert " + o + " to Long."; // throw new FrontendException(msg, errCode, PigException.INPUT, nfe); // } catch (Exception e) { // int errCode = 2054; // String msg = "Internal error. Could not convert " + o + " to Long."; // throw new FrontendException(msg, errCode, PigException.BUG); // } // // } // /** // * Force a data object to a Long, if possible. Any numeric type // * can be forced to a Long (though precision may be lost), as well // * as CharArray, ByteArray, or Boolean. Complex types cannot be // * forced to an Long. This isn't particularly efficient, so if you // * already <b>know</b> that the object you have is a Long you // * should just cast it. Unlike {@link #toLong(Object, byte)} this // * method will first determine the type of o and then do the cast. // * Use {@link #toLong(Object, byte)} if you already know the type. // * @param o object to cast // * @return The object as a Long. // * @throws FrontendException if the type can't be forced to an Long. // */ // public static Long toLong(Object o) throws FrontendException { // return toLong(o, findType(o)); // } // /** // * Force a data object to a Float, if possible. Any numeric type // * can be forced to a Float (though precision may be lost), as well // * as CharArray, ByteArray. Complex types cannot be // * forced to a Float. This isn't particularly efficient, so if you // * already <b>know</b> that the object you have is a Float you // * should just cast it. // * @param o object to cast // * @param type of the object you are casting // * @return The object as a Float. // * @throws FrontendException if the type can't be forced to a Float. // */ // public static Float toFloat(Object o,byte type) throws FrontendException { // try { // switch (type) { // case INTEGER: // return new Float(((Integer)o).floatValue()); // // case LONG: // return new Float(((Long)o).floatValue()); // // case FLOAT: // return (Float)o; // // case DOUBLE: // return new Float(((Double)o).floatValue()); // // case BYTEARRAY: // return Float.valueOf(((DataByteArray)o).toString()); // // case CHARARRAY: // return Float.valueOf((String)o); // // case NULL: // return null; // // case BOOLEAN: // case BYTE: // case MAP: // case INTERNALMAP: // case TUPLE: // case BAG: // case UNKNOWN: // default: // int errCode = 1071; // String msg = "Cannot convert a " + findTypeName(o) + // " to a Float"; // throw new FrontendException(msg, errCode, PigException.INPUT); // } // } catch (ClassCastException cce) { // throw cce; // } catch (FrontendException ee) { // throw ee; // } catch (NumberFormatException nfe) { // int errCode = 1074; // String msg = "Problem with formatting. Could not convert " + o + " to Float."; // throw new FrontendException(msg, errCode, PigException.INPUT, nfe); // } catch (Exception e) { // int errCode = 2054; // String msg = "Internal error. Could not convert " + o + " to Float."; // throw new FrontendException(msg, errCode, PigException.BUG); // } // } // // /** // * Force a data object to a Float, if possible. Any numeric type // * can be forced to a Float (though precision may be lost), as well // * as CharArray, ByteArray, or Boolean. Complex types cannot be // * forced to an Float. This isn't particularly efficient, so if you // * already <b>know</b> that the object you have is a Float you // * should just cast it. Unlike {@link #toFloat(Object, byte)} this // * method will first determine the type of o and then do the cast. // * Use {@link #toFloat(Object, byte)} if you already know the type. // * @param o object to cast // * @return The object as a Float. // * @throws FrontendException if the type can't be forced to an Float. // */ // public static Float toFloat(Object o) throws FrontendException { // return toFloat(o, findType(o)); // } // // /** // * Force a data object to a Double, if possible. Any numeric type // * can be forced to a Double, as well // * as CharArray, ByteArray. Complex types cannot be // * forced to a Double. This isn't particularly efficient, so if you // * already <b>know</b> that the object you have is a Double you // * should just cast it. // * @param o object to cast // * @param type of the object you are casting // * @return The object as a Double. // * @throws FrontendException if the type can't be forced to a Double. // */ // public static Double toDouble(Object o,byte type) throws FrontendException { // try { // switch (type) { // case INTEGER: // return new Double(((Integer)o).doubleValue()); // // case LONG: // return new Double(((Long)o).doubleValue()); // // case FLOAT: // return new Double(((Float)o).doubleValue()); // // case DOUBLE: // return (Double)o; // // case BYTEARRAY: // return Double.valueOf(((DataByteArray)o).toString()); // // case CHARARRAY: // return Double.valueOf((String)o); // // case NULL: // return null; // // case BOOLEAN: // case BYTE: // case MAP: // case INTERNALMAP: // case TUPLE: // case BAG: // case UNKNOWN: // default: // int errCode = 1071; // String msg = "Cannot convert a " + findTypeName(o) + // " to a Double"; // throw new FrontendException(msg, errCode, PigException.INPUT); // } // } catch (ClassCastException cce) { // throw cce; // } catch (FrontendException ee) { // throw ee; // } catch (NumberFormatException nfe) { // int errCode = 1074; // String msg = "Problem with formatting. Could not convert " + o + " to Double."; // throw new FrontendException(msg, errCode, PigException.INPUT, nfe); // } catch (Exception e) { // int errCode = 2054; // String msg = "Internal error. Could not convert " + o + " to Double."; // throw new FrontendException(msg, errCode, PigException.BUG); // } // } // // /** // * Force a data object to a Double, if possible. Any numeric type // * can be forced to a Double, as well // * as CharArray, ByteArray, or Boolean. Complex types cannot be // * forced to an Double. This isn't particularly efficient, so if you // * already <b>know</b> that the object you have is a Double you // * should just cast it. Unlike {@link #toDouble(Object, byte)} this // * method will first determine the type of o and then do the cast. // * Use {@link #toDouble(Object, byte)} if you already know the type. // * @param o object to cast // * @return The object as a Double. // * @throws FrontendException if the type can't be forced to an Double. // */ // public static Double toDouble(Object o) throws FrontendException { // return toDouble(o, findType(o)); // } // // /** // * Force a data object to a String, if possible. Any simple (atomic) type // * can be forced to a String including ByteArray. Complex types cannot be // * forced to a String. This isn't particularly efficient, so if you // * already <b>know</b> that the object you have is a String you // * should just cast it. // * @param o object to cast // * @param type of the object you are casting // * @return The object as a String. // * @throws FrontendException if the type can't be forced to a String. // */ // public static String toString(Object o,byte type) throws FrontendException { // try { // switch (type) { // case INTEGER: // return ((Integer)o).toString(); // // case LONG: // return ((Long)o).toString(); // // case FLOAT: // return ((Float)o).toString(); // // case DOUBLE: // return ((Double)o).toString(); // // case BYTEARRAY: // return ((DataByteArray)o).toString(); // // case CHARARRAY: // return ((String)o); // // case NULL: // return null; // // case BOOLEAN: // return ((Boolean)o).toString(); // // case BYTE: // return ((Byte)o).toString(); // // case MAP: // case INTERNALMAP: // case TUPLE: // case BAG: // case UNKNOWN: // default: // int errCode = 1071; // String msg = "Cannot convert a " + findTypeName(o) + // " to a String"; // throw new FrontendException(msg, errCode, PigException.INPUT); // } // } catch (ClassCastException cce) { // throw cce; // } catch (FrontendException ee) { // throw ee; // } catch (Exception e) { // int errCode = 2054; // String msg = "Internal error. Could not convert " + o + " to String."; // throw new FrontendException(msg, errCode, PigException.BUG); // } // } // // /** // * Force a data object to a String, if possible. Any simple (atomic) type // * can be forced to a String including ByteArray. Complex types cannot be // * forced to a String. This isn't particularly efficient, so if you // * already <b>know</b> that the object you have is a String you // * should just cast it. Unlike {@link #toString(Object, byte)} this // * method will first determine the type of o and then do the cast. // * Use {@link #toString(Object, byte)} if you already know the type. // * @param o object to cast // * @return The object as a String. // * @throws FrontendException if the type can't be forced to a String. // */ // public static String toString(Object o) throws FrontendException { // return toString(o, findType(o)); // } // // /** // * If this object is a map, return it as a map. // * This isn't particularly efficient, so if you // * already <b>know</b> that the object you have is a Map you // * should just cast it. // * @param o object to cast // * @return The object as a Map. // * @throws FrontendException if the type can't be forced to a Double. // */ // @SuppressWarnings("unchecked") // public static Map<String, Object> toMap(Object o) throws FrontendException { // if (o == null) { // return null; // } // // if (o instanceof Map && !(o instanceof InternalMap)) { // try { // return (Map<String, Object>)o; // } catch (Exception e) { // int errCode = 2054; // String msg = "Internal error. Could not convert " + o + " to Map."; // throw new FrontendException(msg, errCode, PigException.BUG); // } // } else { // int errCode = 1071; // String msg = "Cannot convert a " + findTypeName(o) + // " to a Map"; // throw new FrontendException(msg, errCode, PigException.INPUT); // } // } // /** // * If this object is a tuple, return it as a tuple. // * This isn't particularly efficient, so if you // * already <b>know</b> that the object you have is a Tuple you // * should just cast it. // * @param o object to cast // * @return The object as a Double. // * @throws FrontendException if the type can't be forced to a Double. // */ // public static Tuple toTuple(Object o) throws FrontendException { // if (o == null) { // return null; // } // // if (o instanceof Tuple) { // try { // return (Tuple)o; // } catch (Exception e) { // int errCode = 2054; // String msg = "Internal error. Could not convert " + o + " to Tuple."; // throw new FrontendException(msg, errCode, PigException.BUG); // } // } else { // int errCode = 1071; // String msg = "Cannot convert a " + findTypeName(o) + // " to a Tuple"; // throw new FrontendException(msg, errCode, PigException.INPUT); // } // } // /** // * If this object is a bag, return it as a bag. // * This isn't particularly efficient, so if you // * already <b>know</b> that the object you have is a bag you // * should just cast it. // * @param o object to cast // * @return The object as a Double. // * @throws FrontendException if the type can't be forced to a Double. // */ // public static DataBag toBag(Object o) throws FrontendException { // if (o == null) { // return null; // } // // if (o instanceof DataBag) { // try { // return (DataBag)o; // } catch (Exception e) { // int errCode = 2054; // String msg = "Internal error. Could not convert " + o + " to Bag."; // throw new FrontendException(msg, errCode, PigException.BUG); // } // } else { // int errCode = 1071; // String msg = "Cannot convert a " + findTypeName(o) + // " to a DataBag"; // throw new FrontendException(msg, errCode, PigException.INPUT); // } // } // // /** // * Purely for debugging // */ // public static void spillTupleContents(Tuple t, String label) { // System.out.print("Tuple " + label + " "); // Iterator<Object> i = t.getAll().iterator(); // for (int j = 0; i.hasNext(); j++) { // System.out.print(j + ":" + i.next().getClass().getName() + " "); // } // System.out.println(t.toString()); // } /** * Determine if this type is a numeric type. * @param t type (as byte value) to test * @return true if this is a numeric type, false otherwise */ public static boolean isNumberType(byte t) { switch (t) { case INTEGER: return true ; case LONG: return true ; case FLOAT: return true ; case DOUBLE: return true ; default: return false ; } } /** * Determine if this is a type that can work can be done on. * @param t type (as a byte value) to test * @return false if the type is unknown, null, or error; true otherwise. */ public static boolean isUsableType(byte t) { switch (t) { case UNKNOWN: return false ; case NULL: return false ; case ERROR: return false ; default :return true ; } } /** * Test if one type can cast to the other. * @param castType data type of the cast type * @param inputType data type of the input * @return true or false */ public static boolean castable(byte castType, byte inputType) { // Only legal types can be cast to if ( (!DataType.isUsableType(castType)) || (!DataType.isUsableType(inputType)) ) { return false; } // Same type is castable if (castType==inputType) { return true; } // Numerical type is castable if ( (DataType.isNumberType(castType)) && (DataType.isNumberType(inputType)) ) { return true; } // databyte can cast to anything if (inputType == DataType.BYTEARRAY) { return true; } // Cast numerical type to string, or vice versa is valid if (DataType.isNumberType(inputType)&&castType==DataType.CHARARRAY || DataType.isNumberType(castType)&&inputType==DataType.CHARARRAY) return true; // else return false return false; } /** * Merge types if possible. Merging types means finding a type that one * or both types can be upcast to. * @param type1 * @param type2 * @return the merged type, or DataType.ERROR if not successful */ public static byte mergeType(byte type1, byte type2) { // Only legal types can be merged if ( (!DataType.isUsableType(type1)) || (!DataType.isUsableType(type2)) ) { return DataType.ERROR ; } // Same type is OK if (type1==type2) { return type1 ; } // Both are number so we return the bigger type if ( (DataType.isNumberType(type1)) && (DataType.isNumberType(type2)) ) { return type1>type2 ? type1:type2 ; } // One is bytearray and the other is (number or chararray) if (type1 == DataType.BYTEARRAY) { return type2 ; } if (type2 == DataType.BYTEARRAY) { return type1 ; } // else return just ERROR return DataType.ERROR ; } /** * Given a map, turn it into a String. * @param m map * @return string representation of the map */ public static String mapToString(Map<String, Object> m) { boolean hasNext = false; StringBuilder sb = new StringBuilder(); sb.append("["); for(Map.Entry<String, Object> e: m.entrySet()) { if(hasNext) { sb.append(","); } else { hasNext = true; } sb.append(e.getKey()); sb.append("#"); Object val = e.getValue(); if(val != null) { sb.append(val.toString()); } } sb.append("]"); return sb.toString(); } /** * Test whether two byte arrays (Java byte arrays not Pig byte arrays) are * equal. I have no idea why we have this function. * @param lhs byte array 1 * @param rhs byte array 2 * @return true if both are null or the two are the same length and have * the same bytes. */ public static boolean equalByteArrays(byte[] lhs, byte[] rhs) { if(lhs == null && rhs == null) { return true; } if(lhs == null || rhs == null) { return false; } if(lhs.length != rhs.length) { return false; } for(int i = 0; i < lhs.length; ++i) { if(lhs[i] != rhs[i]) { return false; } } return true; } // /** // * Utility method that determines the schema from the passed in dataType. // * If the dataType is Bag or Tuple, then we need to determine the schemas inside this dataType; // * for this we iterate through the fields inside this field. This method works both for raw objects // * and ResourceSchema.ResourceFieldSchema field descriptions; the specific behavior is determined by the klass // * parameter. // * @param dataType DataType.CHARARRAY, DataType.TUPLE, and so on // * @param fieldIter iterator over the fields if this is a tuple or a bag // * @param fieldNum number of fields inside the field if a tuple // * @param klass should be Object or ResourceSchema.ResourceFieldSchema // * @return // * @throws FrontendException // * @throws FrontendException // * @throws SchemaMergeException // */ // @SuppressWarnings("deprecation") // private static Schema.FieldSchema determineFieldSchema(byte dataType, Iterator fieldIter, // long fieldNum, Class klass ) throws FrontendException, FrontendException, SchemaMergeException { // switch (dataType) { // case NULL: // return new Schema.FieldSchema(null, BYTEARRAY); // // case BOOLEAN: // case INTEGER: // case LONG: // case FLOAT: // case DOUBLE: // case BYTEARRAY: // case CHARARRAY: // case MAP: // return new Schema.FieldSchema(null, dataType); // case TUPLE: { // Schema schema = null; // if(fieldNum != 0) { // schema = new Schema(); // for(int i = 0; i < fieldNum; ++i) { // schema.add(determineFieldSchema(klass.cast(fieldIter.next()))); // } // } // return new Schema.FieldSchema(null, schema, TUPLE); // } // // case BAG: { // Schema schema = null; // Schema bagSchema = null; // // if(fieldNum != 0) { // ArrayList<Schema> schemas = new ArrayList<Schema>(); // while (fieldIter.hasNext() ) { // schemas.add(determineFieldSchema(klass.cast(fieldIter.next())).schema); // } // schema = schemas.get(0); // if(null == schema) { // Schema.FieldSchema tupleFs = new Schema.FieldSchema(null, null, TUPLE); // bagSchema = new Schema(tupleFs); // bagSchema.setTwoLevelAccessRequired(true); // return new Schema.FieldSchema(null, bagSchema, BAG); // } // int schemaSize = schema.size(); // // for(int i = 1; i < schemas.size(); ++i) { // Schema currSchema = schemas.get(i); // if((null == currSchema) || (currSchema.size() != schemaSize)) { // Schema.FieldSchema tupleFs = new Schema.FieldSchema(null, null, TUPLE); // bagSchema = new Schema(tupleFs); // bagSchema.setTwoLevelAccessRequired(true); // return new Schema.FieldSchema(null, bagSchema, BAG); // } // schema = Schema.mergeSchema(schema, currSchema, false, false, false); // } // Schema.FieldSchema tupleFs = new Schema.FieldSchema(null, schema, TUPLE); // bagSchema = new Schema(tupleFs); // // since this schema has tuple field schema which internally // // has a list of field schemas for the actual items in the bag // // an access to any field in the bag is a two level access // bagSchema.setTwoLevelAccessRequired(true); // } // return new Schema.FieldSchema(null, bagSchema, BAG); // } // default: { // int errCode = 1073; // String msg = "Cannot determine field schema"; // throw new FrontendException(msg, errCode, PigException.INPUT); // } // // } // } // /*** // * Determine the field schema of an ResourceFieldSchema // * @param rcFieldSchema the rcFieldSchema we want translated // * @return the field schema corresponding to the object // * @throws FrontendException,FrontendException,SchemaMergeException // */ // public static Schema.FieldSchema determineFieldSchema(ResourceSchema.ResourceFieldSchema rcFieldSchema) // throws FrontendException, SchemaMergeException { // byte dt = rcFieldSchema.getType(); // Iterator<ResourceSchema.ResourceFieldSchema> fieldIter = null; // long fieldNum = 0; // if (dt == TUPLE || dt == BAG ) { // fieldIter = Arrays.asList(rcFieldSchema.getSchema().getFields()).iterator(); // fieldNum = rcFieldSchema.getSchema().getFields().length; // } // return determineFieldSchema(dt, fieldIter, fieldNum, ResourceSchema.ResourceFieldSchema.class); // } // // // /*** // * Determine the field schema of an object // * @param o the object whose field schema is to be determined // * @return the field schema corresponding to the object // * @throws FrontendException,FrontendException,SchemaMergeException // */ // public static Schema.FieldSchema determineFieldSchema(Object o) // throws FrontendException, SchemaMergeException { // byte dt = findType(o); // Iterator fieldIter = null; // long fieldNum = 0; // if ( dt == TUPLE ) { // fieldIter = ((Tuple) o).getAll().iterator(); // fieldNum = ((Tuple) o).size(); // } else if ( dt == BAG ) { // fieldNum = ((DataBag) o).size(); // fieldIter = ((DataBag)o).iterator(); // } // return determineFieldSchema(dt, fieldIter, fieldNum, Object.class); // } }