/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.flow.stack;
import java.io.IOException;
import java.util.Set;
import cascading.flow.FlowElement;
import cascading.flow.FlowStep;
import cascading.flow.Scope;
import cascading.flow.hadoop.HadoopFlowProcess;
import cascading.operation.Function;
import cascading.pipe.Each;
import cascading.pipe.Group;
import cascading.pipe.Pipe;
import cascading.tap.Tap;
import cascading.tap.TempHfs;
import cascading.tuple.Tuple;
import cascading.util.Util;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.log4j.Logger;
/**
*
*/
public class FlowMapperStack
{
/** Field LOG */
private static final Logger LOG = Logger.getLogger( FlowMapperStack.class );
/** Field step */
private final FlowStep step;
/** Field currentSource */
private final Tap currentSource;
/** Field flowSession */
private final HadoopFlowProcess flowProcess;
/** Field stack */
private Stack stacks[];
/** Field sourceElement */
private SourceMapperStackElement sourceElement;
/** Class Stack is a simple holder for stack head and tails */
private class Stack
{
/** Field stackHead */
MapperStackElement head;
/** Field stackTail */
MapperStackElement tail;
}
public FlowMapperStack( HadoopFlowProcess flowProcess ) throws IOException
{
this.flowProcess = flowProcess;
JobConf jobConf = flowProcess.getJobConf();
step = (FlowStep) Util.deserializeBase64( jobConf.getRaw( "cascading.flow.step" ) );
// is set by the MultiInputSplit
currentSource = (Tap) Util.deserializeBase64( jobConf.getRaw( "cascading.step.source" ) );
if( LOG.isDebugEnabled() )
LOG.debug( "map current source: " + currentSource );
buildStack();
for( Stack stack : stacks )
stack.tail.open();
}
private void buildStack() throws IOException
{
Set<Scope> incomingScopes = step.getNextScopes( currentSource );
sourceElement = makeSourceElement( incomingScopes );
stacks = new Stack[ incomingScopes.size() ];
int i = 0;
boolean allFilters = true;
for( Scope incomingScope : incomingScopes )
{
FlowElement operator = step.getNextFlowElement( incomingScope );
stacks[ i ] = new Stack();
stacks[ i ].tail = null;
String trapName = null;
Tap trap = null;
while( operator instanceof Each )
{
trapName = ( (Pipe) operator ).getName();
trap = step.getMapperTrap( trapName );
stacks[ i ].tail = new EachMapperStackElement( stacks[ i ].tail, flowProcess, incomingScope, trap, (Each) operator );
if( ( (Each) operator ).getOperation() instanceof Function )
allFilters = false;
incomingScope = step.getNextScope( operator );
operator = step.getNextFlowElement( incomingScope );
}
boolean useTapCollector = false;
if( operator instanceof Group )
{
Scope outgoingScope = step.getNextScope( operator ); // is always Group
boolean copyTuple = allFilters && i != stacks.length - 1;
if( !copyTuple ) // only copy if the out values are the function results
copyTuple = incomingScope.getOutValuesSelector() == null || incomingScope.getOutValuesSelector().equals( incomingScope.getDeclaredFields() );
trapName = ( (Pipe) operator ).getName();
trap = step.getMapperTrap( trapName );
stacks[ i ].tail = new GroupMapperStackElement( stacks[ i ].tail, flowProcess, incomingScope, trap, (Group) operator, outgoingScope, copyTuple );
}
else if( operator instanceof Tap )
{
useTapCollector = useTapCollector || ( (Tap) operator ).isWriteDirect();
stacks[ i ].tail = new SinkMapperStackElement( stacks[ i ].tail, flowProcess, incomingScope, trapName, trap, (Tap) operator, useTapCollector );
}
else
throw new IllegalStateException( "operator should be group or tap, is instead: " + operator.getClass().getName() );
stacks[ i ].head = (MapperStackElement) stacks[ i ].tail.resolveStack();
i++;
}
}
private SourceMapperStackElement makeSourceElement( Set<Scope> incomingScopes ) throws IOException
{
Scope scope = incomingScopes.iterator().next();
FlowElement operator = step.getNextFlowElement( scope );
// no need to bother with traps for intermediate sources
// should prevent confusing info message below
if( currentSource instanceof TempHfs || !( operator instanceof Pipe ) )
return new SourceMapperStackElement( flowProcess, scope, currentSource );
String trapName = ( (Pipe) operator ).getName();
Tap trap = step.getMapperTrap( trapName );
if( trap != null && incomingScopes.size() != 1 )
LOG.info( "more than one possible trap for source tap, using trap named: " + trapName );
return new SourceMapperStackElement( flowProcess, scope, trapName, trap, currentSource );
}
public void map( Object key, Object value, OutputCollector output ) throws IOException
{
Tuple tuple = null;
try
{
tuple = sourceElement.source( key, value );
}
catch( StackException exception )
{
if( exception.getCause() instanceof Error )
throw (Error) exception.getCause();
if( exception.getCause() instanceof IOException )
throw (IOException) exception.getCause();
throw (RuntimeException) exception.getCause();
}
if( LOG.isDebugEnabled() )
{
if( tuple == null )
LOG.debug( "map skipping key and value" );
if( LOG.isTraceEnabled() )
{
if( key instanceof Tuple )
LOG.trace( "map key: " + ( (Tuple) key ).print() );
else
LOG.trace( "map key: [" + key + "]" );
if( tuple != null )
LOG.trace( "map value: " + tuple.print() );
}
}
// skip the key/value pair if null is returned from the source
if( tuple == null )
return;
for( int i = 0; i < stacks.length; i++ )
{
stacks[ i ].tail.setLastOutput( output );
try
{
stacks[ i ].head.collect( tuple );
}
catch( StackException exception )
{
if( exception.getCause() instanceof Error )
throw (Error) exception.getCause();
if( exception.getCause() instanceof IOException )
throw (IOException) exception.getCause();
throw (RuntimeException) exception.getCause();
}
}
}
public void close() throws IOException
{
for( int i = 0; i < stacks.length; i++ )
stacks[ i ].head.close();
}
}