/*
* chombo: Hadoop Map Reduce utility
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.chombo.util;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.io.WritableComparable;
/**
* General purpose tuple consisting list of primitive types. Implements WritableComparable
* @author pranab
*
*/
public class Tuple implements WritableComparable<Tuple> {
public static final byte BYTE = 0;
public static final byte BOOLEAN = 1;
public static final byte INT = 2;
public static final byte LONG = 3;
public static final byte FLOAT = 4;
public static final byte DOUBLE = 5;
public static final byte STRING = 6;
public static final byte BYTE_ARRAY = 7;
public static final byte TUPLE = 8;
private List<Object> fields;
private String delim = ",";
/**
*
*/
public Tuple() {
fields = new ArrayList<Object>();
}
/**
* @param fields
*/
public Tuple(List<Object> fields) {
this.fields = fields;
}
/**
* creates clone
* @return
*/
public Tuple createClone() {
Tuple clone = new Tuple();
clone.fields.addAll(fields);
return clone;
}
/**
* @param clone
* @return
*/
public Tuple createClone(Tuple clone) {
clone.initialize();
clone.fields.addAll(fields);
return clone;
}
/**
* clears internal list
*/
public void initialize() {
fields.clear();
}
/**
* gets size
* @return
*/
public int getSize() {
return fields.size();
}
/**
* add one or more elements
* @param fieldList
*/
public void add(Object... fieldList) {
for (Object field : fieldList) {
fields.add(field);
}
}
/**
* prepends element
* @param field
*/
public void prepend(Object field) {
fields.add(0, field);
}
/**
* appends element
* @param field
*/
public void append(Object field) {
fields.add( field);
}
/**
* @param field
* @param index
*/
public void insert(Object field, int index) {
fields.add(index, field);
}
/**
* @param types
* @param fields
*/
public void add(byte[] types, String[] fields) {
for (int i = 0; i < fields.length; ++i) {
add(types[i], fields[i]) ;
}
}
/**
* @param other
*/
public void add(Tuple other) {
fields.addAll(other.fields);
}
/**
* @param list
*/
public <T> void add(List<T> list) {
fields.addAll(list);
}
/**
* adds string serilized elements
* @param type
* @param field
*/
public void add(byte type, String field) {
Object typedField = null;
if (type == BYTE ) {
typedField = Byte.decode(field);
} else if (type == BOOLEAN ) {
typedField = Boolean.parseBoolean(field);
} else if (type == INT ) {
typedField = Integer.parseInt(field);
} else if (type == LONG ) {
typedField = Long.parseLong(field);
} else if (type == FLOAT ) {
typedField = Float.parseFloat(field);
} else if (type == DOUBLE ) {
typedField = Double.parseDouble(field);
} else if (type == STRING) {
typedField = field;
} else if (type == BYTE_ARRAY) {
try {
typedField = field.getBytes("utf-8");
} catch (UnsupportedEncodingException e) {
throw new IllegalArgumentException("Failed adding element to tuple, unknown element type");
}
} else {
throw new IllegalArgumentException("Failed adding element to tuple, unknown element type");
}
if (null != typedField){
fields.add(typedField);
}
}
/**
* @param items
* @param start
* @param end
*/
public <T> void addFromArray(T[] items, int start, int end) {
for (int index = start; index < end; ++index) {
add(items[index]);
}
}
/**
* Adds multiple contiguous elements of an array
* @param items
* @param indexes
*/
public <T> void addFromArray(T[] items, int[] indexes) {
for (int index : indexes) {
add(items[index]);
}
}
/**
* Adds multiple elements of an array
* @param items
* @param indexes
*/
public <T> void addArrayElements(T[] items, int[] indexes) {
if (null != indexes) {
for (int i : indexes) {
add(items[i]);
}
}
}
/**
* sets specific element
* @param index
* @param field
*/
public void set(int index, Object field) {
fields.add(index, field);
}
/**
* gets specific element
* @param index
* @return
*/
public Object get(int index) {
return fields.get(index);
}
/**
* gets string from specific index
* @param index
* @return
*/
public String getString(int index) {
return (String)fields.get(index);
}
/**
* gets last element as string
* @return
*/
public String getLastAsString() {
return (String)fields.get(fields.size()-1);
}
/**
* gets int from specific index
* @param index
* @return
*/
public int getInt(int index) {
return (Integer)fields.get(index);
}
/**
* gets last element as int
* @return
*/
public int getLastAsInt() {
return (Integer)fields.get(fields.size()-1);
}
/**
* gets long from specific index
* @param index
* @return
*/
public long getLong(int index) {
return (Long)fields.get(index);
}
/**
* gets last element as long
* @return
*/
public long getLastAsLong() {
return (Long)fields.get(fields.size()-1);
}
/**
* gets double from specific index
* @param index
* @return
*/
public double getDouble(int index) {
return (Double)fields.get(index);
}
/**
* gets last element as double
* @return
*/
public double getLastAsDouble() {
return (Double)fields.get(fields.size()-1);
}
/**
* return true if the element is int
* @param index
* @return
*/
public boolean isInt(int index) {
Object obj = fields.get(index);
return obj instanceof Integer;
}
/**
* return true if the element is string
* @param index
* @return
*/
public boolean isString(int index) {
Object obj = fields.get(index);
return obj instanceof String;
}
/**
* return true if the element is boolean
* @param index
* @return
*/
public boolean isDouble(int index) {
Object obj = fields.get(index);
return obj instanceof Double;
}
/* (non-Javadoc)
* @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
*/
@Override
public void readFields(DataInput in) throws IOException {
initialize();
int numFields = in.readInt();
for(int i = 0; i < numFields; ++i) {
byte type = in.readByte();
if (type == BYTE ) {
fields.add(in.readByte());
} else if (type == BOOLEAN ) {
fields.add(in.readBoolean());
} else if (type == INT ) {
fields.add(in.readInt());
} else if (type == LONG ) {
fields.add(in.readLong());
} else if (type == FLOAT ) {
fields.add(in.readFloat());
} else if (type == DOUBLE ) {
fields.add(in.readDouble());
} else if (type == STRING) {
fields.add(in.readUTF());
} else if (type == BYTE_ARRAY) {
int len = in.readShort();
byte[] bytes = new byte[len];
in.readFully(bytes);
fields.add(bytes);
} else if (type == TUPLE) {
Tuple childTuple = new Tuple();
childTuple.readFields(in);
fields.add(childTuple);
} else {
throw new IllegalArgumentException("Failed encoding, unknown element type in stream");
}
}
}
/* (non-Javadoc)
* @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
*/
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(fields.size());
for(Object field : fields) {
if (field instanceof Byte){
out.writeByte(BYTE);
out.writeByte((Byte)field);
} else if (field instanceof Boolean){
out.writeByte(BOOLEAN);
out.writeBoolean((Boolean)field);
} else if (field instanceof Integer){
out.writeByte(INT);
out.writeInt((Integer)field);
} else if (field instanceof Long){
out.writeByte(LONG);
out.writeLong((Long)field);
} else if (field instanceof Float){
out.writeByte(FLOAT);
out.writeFloat((Float)field);
} else if (field instanceof Double){
out.writeByte(DOUBLE);
out.writeDouble((Double)field);
} else if (field instanceof String){
out.writeByte(STRING);
out.writeUTF((String)field);
} else if (field instanceof byte[]){
byte[] bytes = (byte[])field;
out.writeByte(BYTE_ARRAY);
out.writeShort(bytes.length);
out.write(bytes);
} else if (field instanceof Tuple){
out.writeByte(TUPLE);
((Tuple)field).write(out);
} else {
throw new IllegalArgumentException("Failed encoding, unknown element type in tuple");
}
}
}
/* (non-Javadoc)
* @see java.lang.Object#hashCode()
*/
public int hashCode() {
return fields.hashCode();
}
/* (non-Javadoc)
* @see java.lang.Object#equals(java.lang.Object)
*/
public boolean equals(Object obj ) {
boolean isEqual = false;
if (null != obj && obj instanceof Tuple){
isEqual = ((Tuple)obj).fields.equals(fields);
}
return isEqual;
}
@Override
public int compareTo(Tuple that) {
int compared = 0;
if (fields.size() == that.fields.size()) {
for(int i = 0; i < fields.size() && compared == 0; ++i) {
Object field = fields.get(i);
if (field instanceof Byte){
compared = ((Byte)field).compareTo((Byte)that.fields.get(i));
} else if (field instanceof Boolean){
compared = ((Boolean)field).compareTo((Boolean)that.fields.get(i));
} else if (field instanceof Integer){
compared = ((Integer)field).compareTo((Integer)that.fields.get(i));
} else if (field instanceof Long){
compared = ((Long)field).compareTo((Long)that.fields.get(i));
} else if (field instanceof Float){
compared = ((Float)field).compareTo((Float)that.fields.get(i));
} else if (field instanceof Double){
compared = ((Double)field).compareTo((Double)that.fields.get(i));
} else if (field instanceof String){
compared = ((String)field).compareTo((String)that.fields.get(i));
} else {
throw new IllegalArgumentException("Failed in compare, unknown element type in tuple ");
}
}
} else {
throw new IllegalArgumentException("Can not compare tuples of unequal length this:" +
fields.size() + " that:" + that.fields.size());
}
return compared;
}
/**
* comparison based on all but the last element
* @param other
* @return
*/
public int compareToBase(Tuple other) {
Tuple subThis = new Tuple(fields.subList(0,fields.size()-1));
Tuple subThat = new Tuple(other.fields.subList(0,other.fields.size()-1));
return subThis.compareTo(subThat);
}
/**
* hash code based on all but the last element
* @return
*/
public int hashCodeBase() {
Tuple subThis = new Tuple(fields.subList(0,fields.size()-1));
int hashCode = subThis.hashCode();
hashCode = hashCode < 0 ? -hashCode : hashCode;
return hashCode;
}
/**
* hash based on partial list
* @param subLength
* @return
*/
public int hashCodePartial(int subLength) {
Tuple subThis = new Tuple(fields.subList(0,subLength));
return subThis.hashCode();
}
/**
* returns true if starts with given object
* @param obj
* @return
*/
public boolean startsWith(Object obj) {
return obj.equals(fields.get(0));
}
/**
* sets delimeter
* @param delim
*/
public void setDelim(String delim) {
this.delim = delim;
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
public String toString() {
StringBuilder stBld = new StringBuilder();
for(int i = 0; i < fields.size() ; ++i) {
if (i == 0){
stBld.append(fields.get(i).toString());
} else {
stBld.append(delim).append(fields.get(i).toString());
}
}
return stBld.toString();
}
/**
* to string starting at given index
* @param start
* @return
*/
public String toString(int start) {
return toString(start, fields.size());
}
/**
* @param start
* @return
*/
public String toStringBeg(int start) {
return toString(start, fields.size());
}
/**
* @param end
* @return
*/
public String toStringEnd(int end) {
return toString(0, end);
}
/**
* to string starting at given index
* @param start
* @param end
* @return
*/
public String toString(int start, int end) {
StringBuilder stBld = new StringBuilder();
for(int i = start; i < end ; ++i) {
if (i == start){
stBld.append(fields.get(i).toString());
} else {
stBld.append(delim).append(fields.get(i).toString());
}
}
return stBld.toString();
}
/**
* @param offset
* @return
*/
public Tuple beginningSubTuple(int offset) {
return subTuple(0, offset);
}
/**
* @param offset
* @return
*/
public Tuple endSubTuple(int offset) {
return subTuple(offset, fields.size());
}
/**
* creates tuple based on partial list of source tuple
* @param start
* @param end
* @return
*/
public Tuple subTuple(int start, int end) {
if (end < start) {
throw new IllegalArgumentException("end index is smaller that start index");
}
Tuple subTuple = new Tuple();
for (int i = start; i < end; ++i) {
subTuple.add(get(i));
}
return subTuple;
}
/**
* creates tuple based on partial list of source tuple
* @param start
* @param end
* @return
*/
public String[] subTupleAsArray(int start, int end) {
if (end < start) {
throw new IllegalArgumentException("end index is smaller that start index");
}
String[] subTuple = new String[end - start];
for (int i = start; i < end; ++i) {
subTuple[i - start] = get(i).toString();
}
return subTuple;
}
/**
* @param start
* @return
*/
public String[] subTupleAsArray(int start) {
return subTupleAsArray(start, fields.size());
}
/**
* @return
*/
public String[] getTupleAsArray() {
return subTupleAsArray(0, fields.size());
}
/**
* @param start
* @param end
* @return
*/
public <T> void subTupleAsList(int start, int end, List<T> list) {
for (int i = start; i < end; ++i) {
list.add((T)get(i));
}
}
/**
* @param start
* @return
*/
public <T> void subTupleAsList(int start, List<T> list) {
subTupleAsList(start, fields.size(), list);
}
/**
* @param start
* @return
*/
public <T> void tupleAsList(List<T> list) {
subTupleAsList(0, fields.size(), list);
}
/**
* removes duplicates and maintains same order
*/
public void removeDuplicates() {
List<Object> uniqueFields = new ArrayList<Object>();
for (Object value : fields) {
if (!uniqueFields.contains(value)) {
uniqueFields.add(value);
}
}
fields = uniqueFields;
}
}