/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.pipe;
import java.beans.ConstructorProperties;
import java.util.Iterator;
import java.util.Set;
import cascading.CascadingException;
import cascading.flow.FlowCollector;
import cascading.flow.FlowProcess;
import cascading.flow.Scope;
import cascading.operation.Aggregator;
import cascading.operation.AssertionLevel;
import cascading.operation.Buffer;
import cascading.operation.ConcreteCall;
import cascading.operation.GroupAssertion;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import cascading.tuple.TupleEntryCollector;
import cascading.tuple.TupleEntryIterator;
import cascading.tuple.Tuples;
/**
* The Every operator applies an {@link Aggregator} or {@link Buffer} to every grouping.
* <p/>
* Any number of Every instances may follow other Every, {@link GroupBy}, {@link CoGroup} instance if they apply an Aggregator, not a Buffer.
* If a Buffer, only one Every may follow a GroupBy or CoGroup.
* <p/>
* Every operators create aggregate values for every grouping they encounter. This aggregate value is added to the current
* grouping Tuple. Subsequent Every instances can continue to append values to the grouping Tuple. When an Each follows
* and Every, the Each applies its operation to the grouping Tuple (thus all child values in the grouping are discarded
* and only aggregate values are propagated).
*/
public class Every extends Operator
{
/** Field AGGREGATOR_ARGUMENTS */
private static final Fields AGGREGATOR_ARGUMENTS = Fields.ALL;
/** Field AGGREGATOR_SELECTOR */
private static final Fields AGGREGATOR_SELECTOR = Fields.ALL;
/** Field ASSERTION_SELECTOR */
private static final Fields ASSERTION_SELECTOR = Fields.RESULTS;
/**
* Constructor Every creates a new Every instance.
*
* @param previous previous Pipe to receive input Tuples from
* @param aggregator Aggregator to be applied to every input Tuple grouping
*/
@ConstructorProperties({"previous", "aggregator"})
public Every( Pipe previous, Aggregator aggregator )
{
super( previous, AGGREGATOR_ARGUMENTS, aggregator, AGGREGATOR_SELECTOR );
}
/**
* Constructor Every creates a new Every instance.
*
* @param previous previous Pipe to receive input Tuples from
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param aggregator Aggregator to be applied to every input Tuple grouping
*/
@ConstructorProperties({"previous", "argumentSelector", "aggregator"})
public Every( Pipe previous, Fields argumentSelector, Aggregator aggregator )
{
super( previous, argumentSelector, aggregator, AGGREGATOR_SELECTOR );
}
/**
* Constructor Every creates a new Every instance.
*
* @param previous previous Pipe to receive input Tuples from
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param aggregator Aggregator to be applied to every input Tuple grouping
* @param outputSelector field selector that selects the output Tuple from the grouping and Aggregator results Tuples
*/
@ConstructorProperties({"previous", "argumentSelector", "aggregator", "outputSelector"})
public Every( Pipe previous, Fields argumentSelector, Aggregator aggregator, Fields outputSelector )
{
super( previous, argumentSelector, aggregator, outputSelector );
}
/**
* Constructor Every creates a new Every instance.
*
* @param previous previous Pipe to receive input Tuples from
* @param aggregator Aggregator to be applied to every input Tuple grouping
* @param outputSelector field selector that selects the output Tuple from the grouping and Aggregator results Tuples
*/
@ConstructorProperties({"previous", "aggregator", "outputSelector"})
public Every( Pipe previous, Aggregator aggregator, Fields outputSelector )
{
super( previous, AGGREGATOR_ARGUMENTS, aggregator, outputSelector );
}
/**
* Constructor Every creates a new Every instance.
*
* @param previous previous Pipe to receive input Tuples from
* @param buffer Buffer to be applied to every input Tuple grouping
*/
@ConstructorProperties({"previous", "buffer"})
public Every( Pipe previous, Buffer buffer )
{
super( previous, AGGREGATOR_ARGUMENTS, buffer, AGGREGATOR_SELECTOR );
}
/**
* Constructor Every creates a new Every instance.
*
* @param previous previous Pipe to receive input Tuples from
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param buffer Buffer to be applied to every input Tuple grouping
*/
@ConstructorProperties({"previous", "argumentSelector", "buffer"})
public Every( Pipe previous, Fields argumentSelector, Buffer buffer )
{
super( previous, argumentSelector, buffer, AGGREGATOR_SELECTOR );
}
/**
* Constructor Every creates a new Every instance.
*
* @param previous previous Pipe to receive input Tuples from
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param buffer Buffer to be applied to every input Tuple grouping
* @param outputSelector field selector that selects the output Tuple from the grouping and Buffer results Tuples
*/
@ConstructorProperties({"previous", "argumentSelector", "buffer", "outputSelector"})
public Every( Pipe previous, Fields argumentSelector, Buffer buffer, Fields outputSelector )
{
super( previous, argumentSelector, buffer, outputSelector );
}
/**
* Constructor Every creates a new Every instance.
*
* @param previous previous Pipe to receive input Tuples from
* @param buffer Buffer to be applied to every input Tuple grouping
* @param outputSelector field selector that selects the output Tuple from the grouping and Buffer results Tuples
*/
@ConstructorProperties({"previous", "buffer", "outputSelector"})
public Every( Pipe previous, Buffer buffer, Fields outputSelector )
{
super( previous, AGGREGATOR_ARGUMENTS, buffer, outputSelector );
}
/**
* Constructor Every creates a new Every instance.
*
* @param previous previous Pipe to receive input Tuples from
* @param assertionLevel of type AssertionLevel
* @param assertion GroupAssertion to be applied to every input Tuple grouping
*/
@ConstructorProperties({"previous", "assertionLevel", "assertion"})
public Every( Pipe previous, AssertionLevel assertionLevel, GroupAssertion assertion )
{
super( previous, AGGREGATOR_ARGUMENTS, assertionLevel, assertion, ASSERTION_SELECTOR );
}
/**
* Constructor Every creates a new Every instance.
*
* @param previous previous Pipe to receive input Tuples from
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param assertionLevel AssertionLevel to associate with the Assertion
* @param assertion GroupAssertion to be applied to every input Tuple grouping
*/
@ConstructorProperties({"previous", "argumentSelector", "assertionLevel", "assertion"})
public Every( Pipe previous, Fields argumentSelector, AssertionLevel assertionLevel, GroupAssertion assertion )
{
super( previous, argumentSelector, assertionLevel, assertion, ASSERTION_SELECTOR );
}
/**
* Method isBuffer returns true if this Every instance holds a {@link cascading.operation.Buffer} operation.
*
* @return boolean
*/
public boolean isBuffer()
{
return operation instanceof Buffer;
}
/**
* Method isReducer returns true if this Every instance holds a {@link Aggregator} operation.
*
* @return boolean
*/
public boolean isAggregator()
{
return operation instanceof Aggregator;
}
private Aggregator getAggregator()
{
return (Aggregator) operation;
}
private Buffer getReducer()
{
return (Buffer) operation;
}
private GroupAssertion getGroupAssertion()
{
return (GroupAssertion) operation;
}
@Override
public Fields resolveIncomingOperationFields( Scope incomingScope )
{
if( incomingScope.isEach() || incomingScope.isTap() )
throw new IllegalStateException( "Every cannot follow a Tap or an Each" );
return incomingScope.getOutValuesFields();
}
@Override
public Fields resolveFields( Scope scope )
{
if( scope.isEach() || scope.isTap() )
throw new IllegalStateException( "Every cannot follow a Tap or an Each" );
if( isBuffer() )
return scope.getOutValuesFields();
else
return scope.getOutGroupingFields();
}
/** @see Operator#outgoingScopeFor */
public Scope outgoingScopeFor( Set<Scope> incomingScopes )
{
Fields argumentFields = resolveArgumentSelector( incomingScopes );
verifyArguments( argumentFields );
// we currently don't support using result from a previous Every in the current Every
Scope scope = getFirst( incomingScopes );
if( scope.isEvery() && argumentFields.contains( scope.getDeclaredFields() ) )
throw new OperatorException( this, "arguments may not select a declared field from a previous Every" );
Fields declaredFields = resolveDeclared( incomingScopes, argumentFields );
verifyDeclaredFields( declaredFields );
Fields outgoingGroupingFields = resolveOutgoingGroupingSelector( incomingScopes, argumentFields, declaredFields );
verifyOutputSelector( outgoingGroupingFields );
Fields outgoingValuesFields = resolveOutgoingValues( incomingScopes );
Fields remainderFields = resolveRemainderFields( incomingScopes, argumentFields );
return new Scope( getName(), Scope.Kind.EVERY, remainderFields, argumentFields, declaredFields, outgoingGroupingFields, outgoingValuesFields );
}
Fields resolveOutgoingGroupingSelector( Set<Scope> incomingScopes, Fields argumentSelector, Fields declared )
{
try
{
return resolveOutgoingSelector( incomingScopes, argumentSelector, declared );
}
catch( Exception exception )
{
if( exception instanceof OperatorException )
throw (OperatorException) exception;
if( isBuffer() )
throw new OperatorException( this, "could not resolve outgoing values selector in: " + this, exception );
else
throw new OperatorException( this, "could not resolve outgoing grouping selector in: " + this, exception );
}
}
Fields resolveOutgoingValues( Set<Scope> incomingScopes )
{
// Every never modifies the value stream, just the grouping stream
try
{
return getFirst( incomingScopes ).getOutValuesFields();
}
catch( Exception exception )
{
throw new OperatorException( this, "could not resolve outgoing values selector in: " + this, exception );
}
}
/**
* Method getHandler returns the {@link EveryHandler} for this instnce.
*
* @param outgoingScope of type Scope
* @return EveryHandler
*/
public EveryHandler getHandler( Scope outgoingScope )
{
if( isAssertion() )
return new EveryAssertionHandler( outgoingScope );
else if( isAggregator() )
return new EveryAggregatorHandler( outgoingScope );
else
return new EveryBufferHandler( outgoingScope );
}
/** Class EveryHandler is a helper class that wraps Every instances. */
public abstract class EveryHandler
{
/** Field outgoingScope */
public final Scope outgoingScope;
/** Field outputCollector */
public FlowCollector outputCollector;
/** Field operationCall */
ConcreteCall operationCall;
public EveryHandler( Scope outgoingScope )
{
this.outgoingScope = outgoingScope;
this.operationCall = new ConcreteCall( outgoingScope.getArguments() );
}
public abstract void start( FlowProcess flowProcess, TupleEntry groupEntry );
public abstract void operate( FlowProcess flowProcess, TupleEntry groupEntry, TupleEntry inputEntry, TupleEntryIterator tupleEntryIterator );
public abstract void complete( FlowProcess flowProcess, TupleEntry groupEntry );
@Override
public String toString()
{
return Every.this.toString();
}
public Every getEvery()
{
return Every.this;
}
public void prepare( FlowProcess flowProcess )
{
getOperation().prepare( flowProcess, operationCall );
}
public void cleanup( FlowProcess flowProcess )
{
getOperation().cleanup( flowProcess, operationCall );
}
}
public class EveryAggregatorHandler extends EveryHandler
{
EveryTupleCollector tupleCollector;
private abstract class EveryTupleCollector extends TupleEntryCollector
{
TupleEntry value;
public EveryTupleCollector( Fields fields )
{
super( fields );
}
}
public EveryAggregatorHandler( final Scope outgoingScope )
{
super( outgoingScope );
tupleCollector = new EveryTupleCollector( outgoingScope.getDeclaredFields() )
{
protected void collect( Tuple tuple )
{
outputCollector.collect( makeResult( outgoingScope.getOutGroupingSelector(), value, outgoingScope.getRemainderFields(), outgoingScope.getDeclaredEntry(), tuple ) );
}
};
}
public void start( FlowProcess flowProcess, TupleEntry groupEntry )
{
operationCall.setArguments( null ); // zero it out
operationCall.setOutputCollector( null ); // zero it out
operationCall.setGroup( groupEntry );
try
{
getAggregator().start( flowProcess, operationCall );
}
catch( CascadingException exception )
{
throw exception;
}
catch( Exception exception )
{
throw new OperatorException( Every.this, "operator Every failed starting aggregator", exception );
}
}
public void operate( FlowProcess flowProcess, TupleEntry groupEntry, TupleEntry inputEntry, TupleEntryIterator tupleEntryIterator )
{
try
{
TupleEntry arguments = outgoingScope.getArgumentsEntry( inputEntry );
operationCall.setArguments( arguments );
getAggregator().aggregate( flowProcess, operationCall );
}
catch( CascadingException exception )
{
throw exception;
}
catch( Throwable throwable )
{
throw new OperatorException( Every.this, "operator Every failed executing aggregator: " + operation, throwable );
}
}
public void complete( FlowProcess flowProcess, TupleEntry groupEntry )
{
tupleCollector.value = groupEntry;
operationCall.setArguments( null );
operationCall.setOutputCollector( tupleCollector );
try
{
getAggregator().complete( flowProcess, operationCall );
}
catch( CascadingException exception )
{
throw exception;
}
catch( Exception exception )
{
throw new OperatorException( Every.this, "operator Every failed completing aggregator", exception );
}
}
}
public class EveryBufferHandler extends EveryHandler
{
EveryTupleCollector tupleCollector;
private abstract class EveryTupleCollector extends TupleEntryCollector
{
TupleEntry value;
public EveryTupleCollector( Fields fields )
{
super( fields );
}
}
public EveryBufferHandler( final Scope outgoingScope )
{
super( outgoingScope );
tupleCollector = new EveryTupleCollector( outgoingScope.getDeclaredFields() )
{
protected void collect( Tuple tuple )
{
outputCollector.collect( makeResult( outgoingScope.getOutGroupingSelector(), value, outgoingScope.getRemainderFields(), outgoingScope.getDeclaredEntry(), tuple ) );
}
};
}
public TupleEntry getLastValue()
{
return tupleCollector.value;
}
public void start( FlowProcess flowProcess, TupleEntry groupEntry )
{
}
public void operate( FlowProcess flowProcess, TupleEntry groupEntry, TupleEntry inputEntry, final TupleEntryIterator tupleEntryIterator )
{
// we want to null out any 'values' before and after the iterator begins/ends
// this allows buffers to emit tuples before next() and when hasNext() return false;
final TupleEntry tupleEntry = tupleEntryIterator.getTupleEntry();
final Tuple valueNulledTuple = Tuples.setOnEmpty( tupleEntry, groupEntry );
tupleEntry.setTuple( valueNulledTuple );
tupleCollector.value = tupleEntry; // null out header entries
operationCall.setOutputCollector( tupleCollector );
operationCall.setGroup( groupEntry );
operationCall.setArgumentsIterator( new Iterator<TupleEntry>()
{
public boolean hasNext()
{
boolean hasNext = tupleEntryIterator.hasNext();
if( !hasNext )
tupleEntry.setTuple( valueNulledTuple ); // null out footer entries
return hasNext;
}
public TupleEntry next()
{
return outgoingScope.getArgumentsEntry( (TupleEntry) tupleEntryIterator.next() );
}
public void remove()
{
tupleEntryIterator.remove();
}
} );
try
{
getReducer().operate( flowProcess, operationCall );
}
catch( CascadingException exception )
{
throw exception;
}
catch( Throwable throwable )
{
throw new OperatorException( Every.this, "operator Every failed executing buffer: " + operation, throwable );
}
}
public void complete( FlowProcess flowProcess, TupleEntry groupEntry )
{
}
}
public class EveryAssertionHandler extends EveryHandler
{
public EveryAssertionHandler( Scope outgoingScope )
{
super( outgoingScope );
}
public void start( FlowProcess flowProcess, TupleEntry groupEntry )
{
operationCall.setArguments( null );
operationCall.setOutputCollector( null ); // zero it out
operationCall.setGroup( groupEntry );
getGroupAssertion().start( flowProcess, operationCall );
}
public void operate( FlowProcess flowProcess, TupleEntry groupEntry, TupleEntry inputEntry, TupleEntryIterator tupleEntryIterator )
{
TupleEntry arguments = outgoingScope.getArgumentsEntry( inputEntry );
operationCall.setArguments( arguments );
getGroupAssertion().aggregate( flowProcess, operationCall ); // don't catch exceptions
}
public void complete( FlowProcess flowProcess, TupleEntry groupEntry )
{
operationCall.setArguments( null );
getGroupAssertion().doAssert( flowProcess, operationCall );
outputCollector.collect( groupEntry.getTuple() );
}
}
}