/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.operation.function; import java.beans.ConstructorProperties; import java.util.Arrays; import cascading.flow.FlowProcess; import cascading.operation.BaseOperation; import cascading.operation.Function; import cascading.operation.FunctionCall; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import cascading.tuple.TupleEntryCollector; import org.apache.log4j.Logger; /** * Class UnGroup is a {@link Function} that will 'un-group' data from a given dataset. * <p/> * That is, for the given field positions, this function will emit a new Tuple for every value. For example: * <p/> * <pre> * A, x, y * B, x, z * C, y, z * </pre> * <p/> * to: * <p/> * <pre> * A, x * A, y * B, x * B, z * C, y * C, z * </pre> */ public class UnGroup extends BaseOperation implements Function { /** Field LOG */ private static final Logger LOG = Logger.getLogger( UnGroup.class ); /** Field groupFieldSelector */ private Fields groupFieldSelector; /** Field resultFieldSelectors */ private Fields[] resultFieldSelectors; /** Field size */ private int size = 1; /** * Constructor UnGroup creates a new UnGroup instance. * * @param groupSelector of type Fields * @param valueSelectors of type Fields[] */ @ConstructorProperties({"groupSelector", "valueSelectors"}) public UnGroup( Fields groupSelector, Fields[] valueSelectors ) { int size = 0; for( Fields resultFieldSelector : valueSelectors ) { size = resultFieldSelector.size(); numArgs = groupSelector.size() + size; if( fieldDeclaration.size() != numArgs ) throw new IllegalArgumentException( "all field selectors must be the same size, and this size plus group selector size must equal the declared field size" ); } this.groupFieldSelector = groupSelector; this.resultFieldSelectors = Arrays.copyOf( valueSelectors, valueSelectors.length ); this.fieldDeclaration = Fields.size( groupSelector.size() + size ); } /** * Constructor UnGroup creates a new UnGroup instance. * * @param fieldDeclaration of type Fields * @param groupSelector of type Fields * @param valueSelectors of type Fields[] */ @ConstructorProperties({"fieldDeclaration", "groupSelector", "valueSelectors"}) public UnGroup( Fields fieldDeclaration, Fields groupSelector, Fields[] valueSelectors ) { super( fieldDeclaration ); numArgs = groupSelector.size(); int selectorSize = -1; for( Fields resultFieldSelector : valueSelectors ) { numArgs += resultFieldSelector.size(); int fieldSize = groupSelector.size() + resultFieldSelector.size(); if( selectorSize != -1 && selectorSize != resultFieldSelector.size() ) throw new IllegalArgumentException( "all field selectors must be the same size, and this size plus group selector size must equal the declared field size" ); selectorSize = resultFieldSelector.size(); if( fieldDeclaration.size() != fieldSize ) throw new IllegalArgumentException( "all field selectors must be the same size, and this size plus group selector size must equal the declared field size" ); } this.groupFieldSelector = groupSelector; this.resultFieldSelectors = Arrays.copyOf( valueSelectors, valueSelectors.length ); } /** * Constructor UnGroup creates a new UnGroup instance. Where the numValues argument specifies the number * of values to include. * * @param fieldDeclaration of type Fields * @param groupSelector of type Fields * @param numValues of type int */ @ConstructorProperties({"fieldDeclaration", "groupSelector", "numValues"}) public UnGroup( Fields fieldDeclaration, Fields groupSelector, int numValues ) { super( fieldDeclaration ); this.groupFieldSelector = groupSelector; this.size = numValues; } public void operate( FlowProcess flowProcess, FunctionCall functionCall ) { if( resultFieldSelectors != null ) useResultSelectors( functionCall.getArguments(), functionCall.getOutputCollector() ); else useSize( functionCall.getArguments(), functionCall.getOutputCollector() ); } private void useSize( TupleEntry input, TupleEntryCollector outputCollector ) { if( LOG.isDebugEnabled() ) LOG.debug( "using size: " + size ); Tuple tuple = new Tuple( input.getTuple() ); // make clone Tuple group = tuple.remove( input.getFields(), groupFieldSelector ); for( int i = 0; i < tuple.size(); i = i + size ) { Tuple result = new Tuple( group ); result.addAll( tuple.get( Fields.offsetSelector( size, i ).getPos() ) ); outputCollector.add( result ); } } private void useResultSelectors( TupleEntry input, TupleEntryCollector outputCollector ) { if( LOG.isDebugEnabled() ) LOG.debug( "using result selectors: " + resultFieldSelectors.length ); for( Fields resultFieldSelector : resultFieldSelectors ) { Tuple group = input.selectTuple( groupFieldSelector ); group.addAll( input.selectTuple( resultFieldSelector ) ); outputCollector.add( group ); } } @Override public boolean equals( Object object ) { if( this == object ) return true; if( !( object instanceof UnGroup ) ) return false; if( !super.equals( object ) ) return false; UnGroup unGroup = (UnGroup) object; if( size != unGroup.size ) return false; if( groupFieldSelector != null ? !groupFieldSelector.equals( unGroup.groupFieldSelector ) : unGroup.groupFieldSelector != null ) return false; if( !Arrays.equals( resultFieldSelectors, unGroup.resultFieldSelectors ) ) return false; return true; } @Override public int hashCode() { int result = super.hashCode(); result = 31 * result + ( groupFieldSelector != null ? groupFieldSelector.hashCode() : 0 ); result = 31 * result + ( resultFieldSelectors != null ? Arrays.hashCode( resultFieldSelectors ) : 0 ); result = 31 * result + size; return result; } }