/* (c) 2014 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package com.linkedin.cubert.block;
import java.util.Comparator;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.Tuple;
/**
* Compares two tuples on specified column names.
*
* @author Maneesh Varshney
*
*/
public class TupleComparator implements Comparator<Tuple>
{
// private DataType[] dataTypes;
private final int[] firstTupleIndex;
private final int[] secondTupleIndex;
private final DataType[] comparisonType;
public TupleComparator(BlockSchema schema, String[] columns)
{
this(schema, columns, schema, columns);
}
public TupleComparator(BlockSchema firstSchema,
String[] firstColumns,
BlockSchema secondSchema,
String[] secondColumns)
{
if (firstColumns.length != secondColumns.length)
throw new IllegalArgumentException("Number of columns to compare is not equal "
+ firstColumns.length + " != " + secondColumns.length);
int numColumns = firstColumns.length;
firstTupleIndex = new int[numColumns];
secondTupleIndex = new int[numColumns];
comparisonType = new DataType[numColumns];
for (int i = 0; i < numColumns; i++)
{
firstTupleIndex[i] = firstSchema.getIndex(firstColumns[i]);
secondTupleIndex[i] = secondSchema.getIndex(secondColumns[i]);
DataType firstType = firstSchema.getType(firstTupleIndex[i]);
DataType secondType = secondSchema.getType(secondTupleIndex[i]);
DataType widerType = DataType.getWiderType(firstType, secondType);
comparisonType[i] = (widerType == null) ? DataType.UNKNOWN : widerType;
}
}
@SuppressWarnings({ "rawtypes", "unchecked" })
public static int compareObjects(Object o1, Object o2)
{
if (o1 == null)
return o2 == null ? 0 : -1;
if (o2 == null)
return 1;
return ((Comparable) o1).compareTo(o2);
}
@SuppressWarnings({ "unchecked", "rawtypes" })
public static int compareObjects(Object o1, Object o2, DataType type)
{
if (o1 == null)
return o2 == null ? 0 : -1;
if (o2 == null)
return 1;
switch (type)
{
case INT:
{
int val1 = ((Number) o1).intValue();
int val2 = ((Number) o2).intValue();
return (val1 < val2) ? -1 : (val1 == val2 ? 0 : 1);
}
case LONG:
{
long val1 = ((Number) o1).longValue();
long val2 = ((Number) o2).longValue();
return (val1 < val2) ? -1 : (val1 == val2 ? 0 : 1);
}
case FLOAT:
{
float val1 = ((Number) o1).floatValue();
float val2 = ((Number) o2).floatValue();
return Float.compare(val1, val2);
}
case DOUBLE:
{
double val1 = ((Number) o1).doubleValue();
double val2 = ((Number) o2).doubleValue();
return Double.compare(val1, val2);
}
case MAP:
{
byte pigType = org.apache.pig.data.DataType.MAP;
return org.apache.pig.data.DataType.compare(o1, o2, pigType, pigType);
}
default:
return ((Comparable) o1).compareTo(o2);
}
}
@Override
public int compare(Tuple tuple1, Tuple tuple2)
{
if (tuple1 == null)
return tuple2 == null ? 0 : -1;
if (tuple2 == null)
return 1;
int numColumns = firstTupleIndex.length;
for (int i = 0; i < numColumns; i++)
{
int col1 = firstTupleIndex[i];
int col2 = secondTupleIndex[i];
try
{
Object o1 = tuple1.get(col1);
Object o2 = tuple2.get(col2);
int cmp = compareObjects(o1, o2, comparisonType[i]);
if (cmp != 0)
return cmp;
}
catch (ExecException e)
{
throw new RuntimeException(e);
}
}
return 0;
}
}