/* (c) 2014 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package com.linkedin.cubert.utils;
import com.linkedin.cubert.block.BlockSchema;
import com.linkedin.cubert.block.ColumnType;
import com.linkedin.cubert.block.DataType;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
/**
* Various Tuple utility methods.
*
* @author Maneesh Varshney
*
*/
public class TupleUtils
{
public static void setLong(Tuple tuple, long value, int index, DataType type) throws ExecException
{
switch (type)
{
case BYTE:
tuple.set(index, (byte) value);
break;
case INT:
tuple.set(index, (int) value);
break;
case LONG:
tuple.set(index, value);
break;
default:
throw new IllegalArgumentException("Type " + type
+ " cannot be converted from long");
}
}
public static void setDouble(Tuple tuple, double value, int index, DataType type) throws ExecException
{
switch (type)
{
case DOUBLE:
tuple.set(index, value);
break;
case FLOAT:
tuple.set(index, (float) value);
break;
default:
throw new IllegalArgumentException("Type " + type
+ " cannot be converted from double");
}
}
public static Tuple extractTupleWithReuse(Tuple fromTuple,
BlockSchema schema,
Tuple toTuple,
String[] keys) throws ExecException
{
for (int i = 0; i < keys.length; i++)
toTuple.set(i, fromTuple.get(schema.getIndex(keys[i])));
return toTuple;
}
public static Tuple extractTuple(Tuple tuple, BlockSchema schema, String[] keys) throws ExecException
{
Tuple extracted = TupleFactory.getInstance().newTuple(keys.length);
return extractTupleWithReuse(tuple, schema, extracted, keys);
}
public static void copy(Tuple src, Tuple dest) throws ExecException
{
int idx = 0;
for (Object val : src.getAll())
{
dest.set(idx++, val);
}
}
public static void deepCopy(Tuple src, Tuple dest) throws ExecException
{
int idx = 0;
for (Object val : src.getAll())
{
dest.set(idx++, getFieldDeepCopy(val));
}
}
public static void deepCopyWithReuse(Tuple src, Tuple dest) throws ExecException
{
int idx = 0;
for (Object val : src.getAll())
{
deepFieldCopyWithReuse(idx++, val, dest);
}
}
public static void deepFieldCopyWithReuse(int idx, Object val, Tuple dest) throws ExecException
{
if (val instanceof Tuple)
deepCopyWithReuse((Tuple) val, (Tuple) dest.get(idx));
else if (val instanceof DataBag)
throw new UnsupportedOperationException("Cannot deep copy with reuse for a bag");
else if (val instanceof DataByteArray)
throw new UnsupportedOperationException("Cannot deep copy with reuse for a data byte array");
else if (val instanceof Map)
throw new UnsupportedOperationException("Cannot deep copy with reuse for a map");
else
dest.set(idx, val);
}
public static Tuple getDeepCopy(Tuple originTuple) throws ExecException
{
Tuple copiedTuple = TupleFactory.getInstance().newTuple(originTuple.size());
deepCopy(originTuple, copiedTuple);
return copiedTuple;
}
private static DataBag getBagDeepCopy(DataBag originBag) throws ExecException
{
ArrayList<Tuple> copiedTuples = new ArrayList<Tuple>((int) originBag.size());
for (Tuple t : originBag)
copiedTuples.add(getDeepCopy(t));
DataBag copiedDataBag = BagFactory.getInstance().newDefaultBag(copiedTuples);
return copiedDataBag;
}
@SuppressWarnings({ "rawtypes", "unchecked" })
private static Map getMapDeepCopy(Map originMap) throws ExecException
{
Map copiedMap = new HashMap(originMap.size());
for (Map.Entry entry : (Set<Map.Entry>) originMap.entrySet())
{
assert (entry.getKey().getClass() == String.class);
copiedMap.put(entry.getKey(), getFieldDeepCopy(entry.getValue()));
}
return copiedMap;
}
private static DataByteArray getByteArrayDeepCopy(DataByteArray originByteArray)
{
return new DataByteArray(originByteArray.get().clone());
}
private static Object getFieldDeepCopy(Object val) throws ExecException
{
if (val instanceof Tuple)
return getDeepCopy((Tuple) val);
else if (val instanceof DataBag)
return getBagDeepCopy((DataBag) val);
else if (val instanceof DataByteArray)
return getByteArrayDeepCopy((DataByteArray) val);
else if (val instanceof Map)
return getMapDeepCopy((Map) val);
return val;
}
public static Tuple getDeepCopy(Tuple originTuple, BlockSchema schema) throws ExecException
{
Tuple copiedTuple = TupleFactory.getInstance().newTuple(originTuple.size());
for (int i = 0; i < schema.getNumColumns(); i++)
{
ColumnType type = schema.getColumnType(i);
switch (type.getType())
{
case BYTE:
case BOOLEAN:
case INT:
case LONG:
case FLOAT:
case DOUBLE:
case STRING:
case ENUM:
case BYTES:
case UNKNOWN:
copiedTuple.set(i, originTuple.get(i));
break;
case RECORD:
case TUPLE:
copiedTuple.set(i, getDeepCopy((Tuple) originTuple.get(i), type.getColumnSchema()));
break;
case BAG:
case ARRAY:
copiedTuple.set(i, getBagDeepCopy((DataBag) originTuple.get(i), type.getColumnSchema()));
break;
case MAP:
copiedTuple.set(i, getMapDeepCopy((Map) originTuple.get(i)));
break;
}
}
return copiedTuple;
}
private static DataBag getBagDeepCopy(DataBag originBag, BlockSchema bagSchema) throws ExecException
{
BlockSchema tupleSchema = bagSchema.getColumnType(0).getColumnSchema();
ArrayList<Tuple> copiedTuples = new ArrayList<Tuple>((int) originBag.size());
for (Tuple t : originBag)
copiedTuples.add(getDeepCopy(t, tupleSchema));
DataBag copiedDataBag = BagFactory.getInstance().newDefaultBag(copiedTuples);
return copiedDataBag;
}
}