/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.pipe;
import java.beans.ConstructorProperties;
import java.util.Set;
import cascading.CascadingException;
import cascading.flow.FlowCollector;
import cascading.flow.FlowElement;
import cascading.flow.FlowProcess;
import cascading.flow.Scope;
import cascading.operation.Assertion;
import cascading.operation.AssertionLevel;
import cascading.operation.ConcreteCall;
import cascading.operation.Debug;
import cascading.operation.DebugLevel;
import cascading.operation.Filter;
import cascading.operation.FilterCall;
import cascading.operation.Function;
import cascading.operation.FunctionCall;
import cascading.operation.ValueAssertion;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import cascading.tuple.TupleEntryCollector;
import org.apache.log4j.Logger;
/**
* The Each operator applies either a {@link Function} or a {@link Filter} to each entry in the {@link Tuple}
* stream. Any number of Each operators can follow an Each, {@link Group}, or {@link Every}
* operator.
*/
public class Each extends Operator
{
/** Field serialVersionUID */
private static final long serialVersionUID = 1L;
/** Field LOG */
private static final Logger LOG = Logger.getLogger( Each.class );
/** Field FUNCTION_SELECTOR */
private static final Fields FUNCTION_SELECTOR = Fields.RESULTS;
/** Field FILTER_SELECTOR */
private static final Fields FILTER_SELECTOR = Fields.RESULTS;
///////////////////
// TAKE FUNCTIONS
///////////////////
/**
* Pass all fields to the given function, only return fields declared by the function.
*
* @param name name for this branch of Pipes
* @param function Function to be applied to each input Tuple
*/
@ConstructorProperties({"name", "function"})
public Each( String name, Function function )
{
super( name, function, FUNCTION_SELECTOR );
}
/**
* Only pass argumentFields to the given function, only return fields declared by the function.
*
* @param name name for this branch of Pipes
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param function Function to be applied to each input Tuple
*/
@ConstructorProperties({"name", "argumentSelector", "function"})
public Each( String name, Fields argumentSelector, Function function )
{
super( name, argumentSelector, function, FUNCTION_SELECTOR );
}
/**
* Only pass argumentFields to the given function, only return fields selected by the outputSelector.
*
* @param name name for this branch of Pipes
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param function Function to be applied to each input Tuple
* @param outputSelector field selector that selects the output Tuple from the input and Function results Tuples
*/
@ConstructorProperties({"name", "argumentSelector", "function", "outputSelector"})
public Each( String name, Fields argumentSelector, Function function, Fields outputSelector )
{
super( name, argumentSelector, function, outputSelector );
}
/**
* Only return fields selected by the outputSelector.
*
* @param name name for this branch of Pipes
* @param function Function to be applied to each input Tuple
* @param outputSelector field selector that selects the output Tuple from the input and Function results Tuples
*/
@ConstructorProperties({"name", "function", "outputSelector"})
public Each( String name, Function function, Fields outputSelector )
{
super( name, function, outputSelector );
}
/**
* Pass all fields to the given function, only return fields declared by the function.
*
* @param previous previous Pipe to receive input Tuples from
* @param function Function to be applied to each input Tuple
*/
@ConstructorProperties({"previous", "function"})
public Each( Pipe previous, Function function )
{
super( previous, function, FUNCTION_SELECTOR );
}
/**
* Only pass argumentFields to the given function, only return fields declared by the function.
*
* @param previous previous Pipe to receive input Tuples from
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param function Function to be applied to each input Tuple
*/
@ConstructorProperties({"previoud", "argumentSelector", "function"})
public Each( Pipe previous, Fields argumentSelector, Function function )
{
super( previous, argumentSelector, function, FUNCTION_SELECTOR );
}
/**
* Only pass argumentFields to the given function, only return fields selected by the outputSelector.
*
* @param previous previous Pipe to receive input Tuples from
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param function Function to be applied to each input Tuple
* @param outputSelector field selector that selects the output Tuple from the input and Function results Tuples
*/
@ConstructorProperties({"previous", "argumentSelector", "function", "outputSelector"})
public Each( Pipe previous, Fields argumentSelector, Function function, Fields outputSelector )
{
super( previous, argumentSelector, function, outputSelector );
}
/**
* Only pass argumentFields to the given function, only return fields selected by the outputSelector.
*
* @param previous previous Pipe to receive input Tuples from
* @param function Function to be applied to each input Tuple
* @param outputSelector field selector that selects the output Tuple from the input and Function results Tuples
*/
@ConstructorProperties({"previous", "function", "outputSelector"})
public Each( Pipe previous, Function function, Fields outputSelector )
{
super( previous, function, outputSelector );
}
/////////////////
// TAKE FILTERS
/////////////////
/**
* Constructor Each creates a new Each instance.
*
* @param name name for this branch of Pipes
* @param filter Filter to be applied to each input Tuple
*/
@ConstructorProperties({"name", "filter"})
public Each( String name, Filter filter )
{
super( name, filter, FILTER_SELECTOR );
}
/**
* Constructor Each creates a new Each instance.
*
* @param name name for this branch of Pipes
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param filter Filter to be applied to each input Tuple
*/
@ConstructorProperties({"name", "argumentSelector", "filter"})
public Each( String name, Fields argumentSelector, Filter filter )
{
super( name, argumentSelector, filter, FILTER_SELECTOR );
}
/**
* Constructor Each creates a new Each instance.
*
* @param previous previous Pipe to receive input Tuples from
* @param filter Filter to be applied to each input Tuple
*/
@ConstructorProperties({"previous", "filter"})
public Each( Pipe previous, Filter filter )
{
super( previous, filter, FILTER_SELECTOR );
}
/**
* Constructor Each creates a new Each instance.
*
* @param previous previous Pipe to receive input Tuples from
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param filter Filter to be applied to each input Tuple
*/
@ConstructorProperties({"previous", "argumentSelector", "filter"})
public Each( Pipe previous, Fields argumentSelector, Filter filter )
{
super( previous, argumentSelector, filter, FILTER_SELECTOR );
}
///////////////
// ASSERTIONS
///////////////
/**
* Constructor Each creates a new Each instance.
*
* @param name name for this branch of Pipes
* @param assertionLevel AssertionLevel to associate with the Assertion
* @param assertion Assertion to be applied to each input Tuple
*/
@ConstructorProperties({"name", "assertionLevel", "assertion"})
public Each( String name, AssertionLevel assertionLevel, Assertion assertion )
{
super( name, assertionLevel, assertion, FILTER_SELECTOR );
}
/**
* @param name name for this branch of Pipes
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param assertionLevel AssertionLevel to associate with the Assertion
* @param assertion Assertion to be applied to each input Tuple
*/
@ConstructorProperties({"name", "argumentSelector", "assertionLevel", "assertion"})
public Each( String name, Fields argumentSelector, AssertionLevel assertionLevel, Assertion assertion )
{
super( name, argumentSelector, assertionLevel, assertion, FILTER_SELECTOR );
}
/**
* @param previous previous Pipe to receive input Tuples from
* @param assertionLevel AssertionLevel to associate with the Assertion
* @param assertion Assertion to be applied to each input Tuple
*/
@ConstructorProperties({"previous", "assertionLevel", "assertion"})
public Each( Pipe previous, AssertionLevel assertionLevel, Assertion assertion )
{
super( previous, assertionLevel, assertion, FILTER_SELECTOR );
}
/**
* @param previous previous Pipe to receive input Tuples from
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param assertionLevel AssertionLevel to associate with the Assertion
* @param assertion Assertion to be applied to each input Tuple
*/
@ConstructorProperties({"previous", "argumentSelector", "assertionLevel", "assertion"})
public Each( Pipe previous, Fields argumentSelector, AssertionLevel assertionLevel, Assertion assertion )
{
super( previous, argumentSelector, assertionLevel, assertion, FILTER_SELECTOR );
}
//////////
//DEBUG
//////////
/**
* @param name name for this branch of Pipes
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param debugLevel DebugLevel to associate with the Debug
* @param debug Debug to be applied to each input Tuple
*/
@ConstructorProperties({"name", "argumentSelector", "debugLevel", "debug"})
public Each( String name, Fields argumentSelector, DebugLevel debugLevel, Debug debug )
{
super( name, argumentSelector, debugLevel, debug, FILTER_SELECTOR );
}
/**
* @param previous previous Pipe to receive input Tuples from
* @param debugLevel DebugLevel to associate with the Debug
* @param debug Debug to be applied to each input Tuple
*/
@ConstructorProperties({"previous", "debuglevel", "debug"})
public Each( Pipe previous, DebugLevel debugLevel, Debug debug )
{
super( previous, debugLevel, debug, FILTER_SELECTOR );
}
/**
* @param previous previous Pipe to receive input Tuples from
* @param argumentSelector field selector that selects Function arguments from the input Tuple
* @param debugLevel DebugLevel to associate with the Debug
* @param debug Debug to be applied to each input Tuple
*/
@ConstructorProperties({"previous", "argumentSelector", "debugLevel", "debug"})
public Each( Pipe previous, Fields argumentSelector, DebugLevel debugLevel, Debug debug )
{
super( previous, argumentSelector, debugLevel, debug, FILTER_SELECTOR );
}
@Override
protected void verifyOperation()
{
// backwards compatibility with 1.0
if( plannerLevel == null && operation instanceof Debug )
plannerLevel = DebugLevel.DEFAULT;
super.verifyOperation();
if( !argumentSelector.isArgSelector() )
throw new IllegalArgumentException( "invalid argument selector: " + argumentSelector );
if( !operation.getFieldDeclaration().isDeclarator() )
throw new IllegalArgumentException( "invalid field declaration: " + operation.getFieldDeclaration() );
if( !outputSelector.isOutSelector() )
throw new IllegalArgumentException( "invalid output selector: " + outputSelector );
}
private Function getFunction()
{
return (Function) operation;
}
private Filter getFilter()
{
return (Filter) operation;
}
private ValueAssertion getValueAssertion()
{
return (ValueAssertion) operation;
}
private boolean isFunction()
{
return operation instanceof Function;
}
private boolean isFilter()
{
return operation instanceof Filter;
}
private void applyAssertion( FlowProcess flowProcess, FlowCollector flowCollector, TupleEntry input, ConcreteCall operationCall )
{
getValueAssertion().doAssert( flowProcess, operationCall );
flowCollector.collect( input.getTuple() );
}
private void applyFilter( FlowProcess flowProcess, FlowCollector flowCollector, TupleEntry input, FilterCall filterCall )
{
boolean isRemove = false;
isRemove = getFilter().isRemove( flowProcess, filterCall );
if( !isRemove )
flowCollector.collect( input.getTuple() );
}
private void applyFunction( FlowProcess flowProcess, FunctionCall functionCall )
{
getFunction().operate( flowProcess, functionCall ); // adds results to collector
}
// FIELDS
private Fields getFieldsFor( Scope incomingScope )
{
if( incomingScope.isEvery() )
return incomingScope.getOutGroupingFields();
else
return incomingScope.getOutValuesFields();
}
@Override
public Fields resolveIncomingOperationFields( Scope incomingScope )
{
return getFieldsFor( incomingScope );
}
@Override
public Fields resolveFields( Scope scope )
{
return getFieldsFor( scope );
}
/** @see Operator#outgoingScopeFor(Set) */
public Scope outgoingScopeFor( Set<Scope> incomingScopes )
{
Fields argumentFields = resolveArgumentSelector( incomingScopes );
verifyArguments( argumentFields );
Fields declaredFields = resolveDeclared( incomingScopes, argumentFields );
verifyDeclaredFields( declaredFields );
Fields outgoingValuesFields = resolveOutgoingValuesSelector( incomingScopes, argumentFields, declaredFields );
verifyOutputSelector( outgoingValuesFields );
Fields outgoingGroupingFields = Fields.asDeclaration( outgoingValuesFields );
Fields remainderFields = resolveRemainderFields( incomingScopes, argumentFields );
return new Scope( getName(), Scope.Kind.EACH, remainderFields, argumentFields, declaredFields, outgoingGroupingFields, outgoingValuesFields );
}
Fields resolveOutgoingValuesSelector( Set<Scope> incomingScopes, Fields argumentFields, Fields declaredFields )
{
try
{
return resolveOutgoingSelector( incomingScopes, argumentFields, declaredFields );
}
catch( Exception exception )
{
if( exception instanceof OperatorException )
throw (OperatorException) exception;
throw new OperatorException( this, "could not resolve outgoing values selector in: " + this, exception );
}
}
public EachHandler getHandler( FlowCollector flowCollector, Scope scope )
{
if( isFunction() )
return new EachFunctionHandler( flowCollector, scope );
else if( isFilter() )
return new EachFilterHandler( flowCollector, scope );
else
return new EachAssertionHandler( flowCollector, scope );
}
/** Class EachHandler is a helper class that wraps Each instances. */
public abstract class EachHandler
{
FlowCollector flowCollector;
final Scope scope;
protected ConcreteCall operationCall;
protected EachHandler( FlowCollector flowCollector, Scope scope )
{
this.flowCollector = flowCollector;
this.scope = scope;
operationCall = new ConcreteCall( scope.getArguments() );
}
public void operate( FlowProcess flowProcess, TupleEntry input )
{
try
{
if( LOG.isDebugEnabled() )
LOG.debug( operation + " incoming entry: " + input );
TupleEntry arguments = scope.getArgumentsEntry( input );
if( LOG.isDebugEnabled() )
LOG.debug( operation + " arg entry: " + arguments );
handle( flowProcess, input, arguments );
}
catch( CascadingException exception )
{
throw exception;
}
catch( Throwable exception )
{
throw new OperatorException( Each.this, "operator Each failed executing operation", exception );
}
}
abstract void handle( FlowProcess flowProcess, TupleEntry input, TupleEntry arguments );
public FlowElement getEach()
{
return Each.this;
}
public void prepare( FlowProcess flowProcess )
{
getOperation().prepare( flowProcess, operationCall );
}
public void cleanup( FlowProcess flowProcess )
{
getOperation().cleanup( flowProcess, operationCall );
}
}
public class EachFunctionHandler extends EachHandler
{
EachTupleCollector tupleCollector;
private abstract class EachTupleCollector extends TupleEntryCollector
{
Scope scope;
TupleEntry input;
private EachTupleCollector( Fields fields, Scope scope )
{
super( fields );
this.scope = scope;
}
}
public EachFunctionHandler( final FlowCollector flowCollector, Scope scope )
{
super( flowCollector, scope );
tupleCollector = new EachTupleCollector( scope.getDeclaredEntry().getFields(), scope )
{
protected void collect( Tuple tuple )
{
flowCollector.collect( makeResult( scope.getOutValuesSelector(), input, scope.getRemainderFields(), scope.getDeclaredEntry(), tuple ) );
}
};
operationCall.setOutputCollector( tupleCollector );
}
void handle( FlowProcess flowProcess, TupleEntry input, TupleEntry arguments )
{
tupleCollector.input = input;
operationCall.setArguments( arguments );
applyFunction( flowProcess, operationCall );
}
}
public class EachFilterHandler extends EachHandler
{
public EachFilterHandler( FlowCollector flowCollector, Scope scope )
{
super( flowCollector, scope );
}
void handle( FlowProcess flowProcess, TupleEntry input, TupleEntry arguments )
{
operationCall.setArguments( arguments );
applyFilter( flowProcess, flowCollector, input, operationCall );
}
}
public class EachAssertionHandler extends EachHandler
{
public EachAssertionHandler( FlowCollector flowCollector, Scope scope )
{
super( flowCollector, scope );
}
void handle( FlowProcess flowProcess, TupleEntry input, TupleEntry arguments )
{
operationCall.setArguments( arguments );
applyAssertion( flowProcess, flowCollector, input, operationCall );
}
}
}