/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.tuple.hadoop; import java.io.IOException; import java.util.Comparator; import cascading.CascadingException; import cascading.tuple.Fields; import cascading.tuple.StreamComparator; import cascading.tuple.Tuple; import cascading.tuple.TupleInputStream; import cascading.util.Util; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.RawComparator; /** Class DeserializerComparator is the base class for all Cascading comparator classes. */ public abstract class DeserializerComparator<T> extends Configured implements RawComparator<T> { BufferedInputStream lhsBuffer = new BufferedInputStream(); BufferedInputStream rhsBuffer = new BufferedInputStream(); TupleSerialization tupleSerialization; TupleInputStream lhsStream; TupleInputStream rhsStream; Comparator[] groupComparators; @Override public void setConf( Configuration conf ) { if( conf == null ) return; super.setConf( conf ); tupleSerialization = new TupleSerialization( conf ); // get new readers so deserializers don't compete for the buffer lhsStream = new TupleInputStream( lhsBuffer, tupleSerialization.getElementReader() ); rhsStream = new TupleInputStream( rhsBuffer, tupleSerialization.getElementReader() ); groupComparators = deserializeComparatorsFor( "cascading.group.comparator" ); groupComparators = delegatingComparatorsFor( groupComparators ); } Comparator[] deserializeComparatorsFor( String name ) { try { if( getConf() == null ) throw new IllegalStateException( "no conf set" ); String value = getConf().get( name ); if( value == null ) return new Comparator[getConf().getInt( name + ".size", 1 )]; Fields fields = (Fields) Util.deserializeBase64( value ); return fields.getComparators(); } catch( IOException exception ) { throw new CascadingException( "unable to deserialize comparators for: " + name ); } } Comparator[] delegatingComparatorsFor( Comparator[] fieldComparators ) { Comparator[] comparators = new Comparator[fieldComparators.length]; for( int i = 0; i < comparators.length; i++ ) { if( fieldComparators[ i ] instanceof StreamComparator ) comparators[ i ] = new TupleElementStreamComparator( (StreamComparator) fieldComparators[ i ] ); else if( fieldComparators[ i ] != null ) comparators[ i ] = new TupleElementComparator( fieldComparators[ i ] ); else comparators[ i ] = new DelegatingTupleElementComparator( tupleSerialization ); } return comparators; } final int compareTuples( Comparator[] comparators, Tuple lhs, Tuple rhs ) { int lhsLen = lhs.size(); int rhsLen = rhs.size(); int c = lhsLen - rhsLen; if( c != 0 ) return c; for( int i = 0; i < lhsLen; i++ ) { // hack to support comparators array length of 1 c = comparators[ i % comparators.length ].compare( lhs.getObject( i ), rhs.getObject( i ) ); if( c != 0 ) return c; } return 0; } final int compareTuples( Comparator[] comparators ) throws IOException { int lhsLen = lhsStream.getNumElements(); int rhsLen = rhsStream.getNumElements(); int c = lhsLen - rhsLen; if( c != 0 ) return c; for( int i = 0; i < lhsLen; i++ ) { // hack to support comparators array length of 1 c = ( (StreamComparator) comparators[ i % comparators.length ] ).compare( lhsStream, rhsStream ); if( c != 0 ) return c; } return 0; } }