/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.flow.stack;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import cascading.flow.FlowElement;
import cascading.flow.FlowStep;
import cascading.flow.Scope;
import cascading.flow.hadoop.HadoopFlowProcess;
import cascading.pipe.Each;
import cascading.pipe.Every;
import cascading.pipe.Pipe;
import cascading.tap.Tap;
import cascading.tap.TempHfs;
import cascading.tuple.Tuple;
import cascading.util.Util;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.log4j.Logger;
/**
*
*/
public class FlowReducerStack
{
/** Field LOG */
private static final Logger LOG = Logger.getLogger( FlowReducerStack.class );
/** Field step */
private final FlowStep step;
/** Field jobConf */
private final JobConf jobConf;
/** Field flowSession */
private final HadoopFlowProcess flowProcess;
/** Field stackHead */
private ReducerStackElement stackHead;
/** Field stackTail */
private ReducerStackElement stackTail;
public FlowReducerStack( HadoopFlowProcess flowProcess ) throws IOException
{
this.flowProcess = flowProcess;
this.jobConf = flowProcess.getJobConf();
step = (FlowStep) Util.deserializeBase64( jobConf.getRaw( "cascading.flow.step" ) );
// early versions of hadoop 0.19 instantiated this class with no intention of calling reduce()
if( jobConf.getNumReduceTasks() == 0 )
return;
if( step.getGroup() == null )
throw new IllegalStateException( "this step reducer should not be created, num reducers should be zero, found: " + jobConf.getNumReduceTasks() + ", in step: " + step.getStepName() );
buildStack();
stackTail.open();
}
private void buildStack() throws IOException
{
Set<Scope> previousScopes = step.getPreviousScopes( step.getGroup() );
Scope nextScope = step.getNextScope( step.getGroup() );
String trapName = ( (Pipe) step.getGroup() ).getName();
Tap trap = step.getReducerTrap( trapName );
stackTail = new GroupReducerStackElement( flowProcess, previousScopes, step.getGroup(), nextScope, nextScope.getOutGroupingFields(), trap );
FlowElement operator = step.getNextFlowElement( nextScope );
if( operator instanceof Every && !( (Every) operator ).isBuffer() )
{
List<Every.EveryHandler> allAggregators = new ArrayList<Every.EveryHandler>();
Scope incomingScope = nextScope;
stackTail = new EveryAllAggregatorReducerStackElement( stackTail, flowProcess, incomingScope, step.getReducerTraps(), allAggregators );
while( operator instanceof Every && !( (Every) operator ).isBuffer() )
{
nextScope = step.getNextScope( operator );
Every.EveryHandler everyHandler = ( (Every) operator ).getHandler( nextScope );
allAggregators.add( everyHandler );
trapName = ( (Pipe) operator ).getName();
trap = step.getReducerTrap( trapName );
stackTail = new EveryAggregatorReducerStackElement( stackTail, flowProcess, incomingScope, trap, everyHandler );
incomingScope = nextScope;
operator = step.getNextFlowElement( nextScope );
}
}
else if( operator instanceof Every && ( (Every) operator ).isBuffer() )
{
Scope incomingScope = nextScope;
while( operator instanceof Every && ( (Every) operator ).isBuffer() )
{
nextScope = step.getNextScope( operator );
Every.EveryHandler everyHandler = ( (Every) operator ).getHandler( nextScope );
trapName = ( (Pipe) operator ).getName();
trap = step.getReducerTrap( trapName );
stackTail = new EveryBufferReducerStackElement( stackTail, flowProcess, incomingScope, trap, everyHandler );
incomingScope = nextScope;
operator = step.getNextFlowElement( nextScope );
}
}
while( operator instanceof Each )
{
trapName = ( (Pipe) operator ).getName();
trap = step.getReducerTrap( trapName );
stackTail = new EachReducerStackElement( stackTail, flowProcess, nextScope, trap, (Each) operator );
nextScope = step.getNextScope( operator );
operator = step.getNextFlowElement( nextScope );
}
boolean useTapCollector = false;
useTapCollector = useTapCollector || ( (Tap) operator ).isWriteDirect();
// no need for traps around intermediate files
if( operator instanceof TempHfs )
stackTail = new SinkReducerStackElement( stackTail, flowProcess, nextScope, (Tap) operator, useTapCollector );
else
stackTail = new SinkReducerStackElement( stackTail, flowProcess, nextScope, trapName, trap, (Tap) operator, useTapCollector );
stackHead = (ReducerStackElement) stackTail.resolveStack();
}
public void reduce( Object key, Iterator values, OutputCollector output ) throws IOException
{
if( LOG.isTraceEnabled() )
{
LOG.trace( "reduce fields: " + stackHead.getOutGroupingFields() );
LOG.trace( "reduce key: " + ( (Tuple) key ).print() );
}
stackTail.setLastOutput( output );
try
{
stackHead.collect( (Tuple) key, values );
}
catch( StackException exception )
{
if( exception.getCause() instanceof Error )
throw (Error) exception.getCause();
if( exception.getCause() instanceof IOException )
throw (IOException) exception.getCause();
throw (RuntimeException) exception.getCause();
}
}
public void close() throws IOException
{
stackHead.close();
}
}