/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.assembly; import cascading.flow.FlowProcess; import cascading.operation.AggregatorCall; import cascading.pipe.Pipe; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; /** * Computes the euclidean distance between every unique set of first fields, * with using the label and value of each element. * <p/> * Expects on input three values: item, label, value */ public class EuclideanDistance extends CrossTab { private static final long serialVersionUID = 1L; /** * Constructor * * @param previous the upstream pipe */ public EuclideanDistance( Pipe previous ) { this( previous, Fields.size( 3 ), new Fields( "n1", "n2", "euclidean" ) ); } /** * Constructor * * @param previous * @param argumentFieldSelector * @param fieldDeclaration */ public EuclideanDistance( Pipe previous, Fields argumentFieldSelector, Fields fieldDeclaration ) { super( previous, argumentFieldSelector, new Euclidean(), fieldDeclaration ); } /** TODO: doc me */ protected static class Euclidean extends CrossTabOperation<Double[]> { private static final long serialVersionUID = 1L; public Euclidean() { super( new Fields( "euclidean" ) ); } public void start( FlowProcess flowProcess, AggregatorCall<Double[]> aggregatorCall ) { aggregatorCall.setContext( new Double[]{0d} ); } public void aggregate( FlowProcess flowProcess, AggregatorCall<Double[]> aggregatorCall ) { TupleEntry entry = aggregatorCall.getArguments(); aggregatorCall.getContext()[ 0 ] += Math.pow( entry.getDouble( 0 ) - entry.getDouble( 1 ), 2 ); } public void complete( FlowProcess flowProcess, AggregatorCall<Double[]> aggregatorCall ) { aggregatorCall.getOutputCollector().add( new Tuple( 1 / ( 1 + aggregatorCall.getContext()[ 0 ] ) ) ); } } }