/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.flow.stack; import java.io.IOException; import java.util.Set; import cascading.flow.FlowElement; import cascading.flow.FlowStep; import cascading.flow.Scope; import cascading.flow.hadoop.HadoopFlowProcess; import cascading.operation.Function; import cascading.pipe.Each; import cascading.pipe.Group; import cascading.pipe.Pipe; import cascading.tap.Tap; import cascading.tap.TempHfs; import cascading.tuple.Tuple; import cascading.util.Util; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputCollector; import org.apache.log4j.Logger; /** * */ public class FlowMapperStack { /** Field LOG */ private static final Logger LOG = Logger.getLogger( FlowMapperStack.class ); /** Field step */ private final FlowStep step; /** Field currentSource */ private final Tap currentSource; /** Field flowSession */ private final HadoopFlowProcess flowProcess; /** Field stack */ private Stack stacks[]; /** Field sourceElement */ private SourceMapperStackElement sourceElement; /** Class Stack is a simple holder for stack head and tails */ private class Stack { /** Field stackHead */ MapperStackElement head; /** Field stackTail */ MapperStackElement tail; } public FlowMapperStack( HadoopFlowProcess flowProcess ) throws IOException { this.flowProcess = flowProcess; JobConf jobConf = flowProcess.getJobConf(); step = (FlowStep) Util.deserializeBase64( jobConf.getRaw( "cascading.flow.step" ) ); // is set by the MultiInputSplit currentSource = (Tap) Util.deserializeBase64( jobConf.getRaw( "cascading.step.source" ) ); if( LOG.isDebugEnabled() ) LOG.debug( "map current source: " + currentSource ); buildStack(); for( Stack stack : stacks ) stack.tail.open(); } private void buildStack() throws IOException { Set<Scope> incomingScopes = step.getNextScopes( currentSource ); sourceElement = makeSourceElement( incomingScopes ); stacks = new Stack[ incomingScopes.size() ]; int i = 0; boolean allFilters = true; for( Scope incomingScope : incomingScopes ) { FlowElement operator = step.getNextFlowElement( incomingScope ); stacks[ i ] = new Stack(); stacks[ i ].tail = null; String trapName = null; Tap trap = null; while( operator instanceof Each ) { trapName = ( (Pipe) operator ).getName(); trap = step.getMapperTrap( trapName ); stacks[ i ].tail = new EachMapperStackElement( stacks[ i ].tail, flowProcess, incomingScope, trap, (Each) operator ); if( ( (Each) operator ).getOperation() instanceof Function ) allFilters = false; incomingScope = step.getNextScope( operator ); operator = step.getNextFlowElement( incomingScope ); } boolean useTapCollector = false; if( operator instanceof Group ) { Scope outgoingScope = step.getNextScope( operator ); // is always Group boolean copyTuple = allFilters && i != stacks.length - 1; if( !copyTuple ) // only copy if the out values are the function results copyTuple = incomingScope.getOutValuesSelector() == null || incomingScope.getOutValuesSelector().equals( incomingScope.getDeclaredFields() ); trapName = ( (Pipe) operator ).getName(); trap = step.getMapperTrap( trapName ); stacks[ i ].tail = new GroupMapperStackElement( stacks[ i ].tail, flowProcess, incomingScope, trap, (Group) operator, outgoingScope, copyTuple ); } else if( operator instanceof Tap ) { useTapCollector = useTapCollector || ( (Tap) operator ).isWriteDirect(); stacks[ i ].tail = new SinkMapperStackElement( stacks[ i ].tail, flowProcess, incomingScope, trapName, trap, (Tap) operator, useTapCollector ); } else throw new IllegalStateException( "operator should be group or tap, is instead: " + operator.getClass().getName() ); stacks[ i ].head = (MapperStackElement) stacks[ i ].tail.resolveStack(); i++; } } private SourceMapperStackElement makeSourceElement( Set<Scope> incomingScopes ) throws IOException { Scope scope = incomingScopes.iterator().next(); FlowElement operator = step.getNextFlowElement( scope ); // no need to bother with traps for intermediate sources // should prevent confusing info message below if( currentSource instanceof TempHfs || !( operator instanceof Pipe ) ) return new SourceMapperStackElement( flowProcess, scope, currentSource ); String trapName = ( (Pipe) operator ).getName(); Tap trap = step.getMapperTrap( trapName ); if( trap != null && incomingScopes.size() != 1 ) LOG.info( "more than one possible trap for source tap, using trap named: " + trapName ); return new SourceMapperStackElement( flowProcess, scope, trapName, trap, currentSource ); } public void map( Object key, Object value, OutputCollector output ) throws IOException { Tuple tuple = null; try { tuple = sourceElement.source( key, value ); } catch( StackException exception ) { if( exception.getCause() instanceof Error ) throw (Error) exception.getCause(); if( exception.getCause() instanceof IOException ) throw (IOException) exception.getCause(); throw (RuntimeException) exception.getCause(); } if( LOG.isDebugEnabled() ) { if( tuple == null ) LOG.debug( "map skipping key and value" ); if( LOG.isTraceEnabled() ) { if( key instanceof Tuple ) LOG.trace( "map key: " + ( (Tuple) key ).print() ); else LOG.trace( "map key: [" + key + "]" ); if( tuple != null ) LOG.trace( "map value: " + tuple.print() ); } } // skip the key/value pair if null is returned from the source if( tuple == null ) return; for( int i = 0; i < stacks.length; i++ ) { stacks[ i ].tail.setLastOutput( output ); try { stacks[ i ].head.collect( tuple ); } catch( StackException exception ) { if( exception.getCause() instanceof Error ) throw (Error) exception.getCause(); if( exception.getCause() instanceof IOException ) throw (IOException) exception.getCause(); throw (RuntimeException) exception.getCause(); } } } public void close() throws IOException { for( int i = 0; i < stacks.length; i++ ) stacks[ i ].head.close(); } }