/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.flow;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import cascading.pipe.Group;
import cascading.pipe.Pipe;
import cascading.tap.Tap;
import cascading.tap.TempHfs;
import cascading.util.Util;
import org.apache.log4j.Logger;
import org.jgrapht.GraphPath;
import org.jgrapht.Graphs;
import org.jgrapht.ext.IntegerNameProvider;
import org.jgrapht.ext.VertexNameProvider;
import org.jgrapht.graph.SimpleDirectedGraph;
import org.jgrapht.traverse.TopologicalOrderIterator;
/** Class StepGraph is an internal representation of {@link FlowStep} instances. */
public class StepGraph extends SimpleDirectedGraph<FlowStep, Integer>
{
/** Field LOG */
private static final Logger LOG = Logger.getLogger( StepGraph.class );
/** Constructor StepGraph creates a new StepGraph instance. */
StepGraph()
{
super( Integer.class );
}
/**
* Constructor StepGraph creates a new StepGraph instance.
*
* @param elementGraph of type ElementGraph
* @param traps of type Map<String, Tap>
*/
StepGraph( String flowName, ElementGraph elementGraph, Map<String, Tap> traps )
{
this();
makeStepGraph( flowName, elementGraph, traps );
verifyTrapsAreUnique( traps );
}
private void verifyTrapsAreUnique( Map<String, Tap> traps )
{
for( Tap tap : traps.values() )
{
if( Collections.frequency( traps.values(), tap ) != 1 )
throw new PlannerException( "traps must be unique, cannot be reused on different branches: " + tap );
}
}
/**
* Method getCreateFlowStep ...
*
* @param flowName of type String
* @param steps of type Map<String, FlowStep>
* @param sinkName of type String
* @param numJobs of type int
* @return FlowStep
*/
private FlowStep getCreateFlowStep( String flowName, Map<String, FlowStep> steps, String sinkName, int numJobs )
{
if( steps.containsKey( sinkName ) )
return steps.get( sinkName );
if( LOG.isDebugEnabled() )
LOG.debug( "creating step: " + sinkName );
FlowStep step = new FlowStep( makeStepName( steps, numJobs, sinkName ), steps.size() + 1 );
step.setParentFlowName( flowName );
steps.put( sinkName, step );
return step;
}
private String makeStepName( Map<String, FlowStep> steps, int numJobs, String sinkPath )
{
// todo make the long form optional via a property
if( sinkPath.length() > 75 )
sinkPath = String.format( "...%75s", sinkPath.substring( sinkPath.length() - 75 ) );
return String.format( "(%d/%d) %s", steps.size() + 1, numJobs, sinkPath );
}
/**
* Creates the map reduce step graph.
*
* @param flowName
* @param elementGraph
* @param traps
*/
private void makeStepGraph( String flowName, ElementGraph elementGraph, Map<String, Tap> traps )
{
SimpleDirectedGraph<Tap, Integer> tapGraph = elementGraph.makeTapGraph();
int numJobs = countNumJobs( tapGraph );
Map<String, FlowStep> steps = new LinkedHashMap<String, FlowStep>();
TopologicalOrderIterator<Tap, Integer> topoIterator = new TopologicalOrderIterator<Tap, Integer>( tapGraph );
int count = 0;
while( topoIterator.hasNext() )
{
Tap source = topoIterator.next();
if( LOG.isDebugEnabled() )
LOG.debug( "handling source: " + source );
List<Tap> sinks = Graphs.successorListOf( tapGraph, source );
for( Tap sink : sinks )
{
if( LOG.isDebugEnabled() )
LOG.debug( "handling path: " + source + " -> " + sink );
FlowStep step = getCreateFlowStep( flowName, steps, sink.toString(), numJobs );
addVertex( step );
if( steps.containsKey( source.toString() ) )
addEdge( steps.get( source.toString() ), step, count++ );
// support multiple paths from source to sink
// this allows for self joins on groups, even with different operation stacks between them
// note we must ignore paths with intermediate taps
List<GraphPath<FlowElement, Scope>> paths = elementGraph.getAllShortestPathsBetween( source, sink );
for( GraphPath<FlowElement, Scope> path : paths )
{
if( pathContainsTap( path ) )
continue;
List<Scope> scopes = path.getEdgeList();
String sourceName = scopes.get( 0 ).getName(); // root node of the shortest path
step.sources.put( (Tap) source, sourceName );
step.sink = sink;
if( step.sink.isWriteDirect() )
step.tempSink = new TempHfs( "tmp:/" + sink.getPath().toUri().getPath(), true );
FlowElement lhs = source;
step.graph.addVertex( lhs );
boolean onMapSide = true;
for( Scope scope : scopes )
{
FlowElement rhs = elementGraph.getEdgeTarget( scope );
step.graph.addVertex( rhs );
step.graph.addEdge( lhs, rhs, scope );
if( rhs instanceof Group )
{
step.setGroup( (Group) rhs );
onMapSide = false;
}
else if( rhs instanceof Pipe ) // add relevant traps to step
{
String name = ( (Pipe) rhs ).getName();
// this is legacy, can probably now collapse into one collection safely
if( traps.containsKey( name ) )
{
if( onMapSide )
step.getMapperTraps().put( name, traps.get( name ) );
else
step.getReducerTraps().put( name, traps.get( name ) );
}
}
lhs = rhs;
}
}
}
}
}
private int countNumJobs( SimpleDirectedGraph<Tap, Integer> tapGraph )
{
Set<Tap> vertices = tapGraph.vertexSet();
int count = 0;
for( Tap vertice : vertices )
{
if( tapGraph.inDegreeOf( vertice ) != 0 )
count++;
}
return count;
}
private boolean pathContainsTap( GraphPath<FlowElement, Scope> path )
{
List<FlowElement> flowElements = Graphs.getPathVertexList( path );
// first and last are taps, if we find more than 2, return false
int count = 0;
for( FlowElement flowElement : flowElements )
{
if( flowElement instanceof Tap )
count++;
}
return count > 2;
}
public TopologicalOrderIterator<FlowStep, Integer> getTopologicalIterator()
{
return new TopologicalOrderIterator<FlowStep, Integer>( this, new PriorityQueue<FlowStep>( 10, new Comparator<FlowStep>()
{
@Override
public int compare( FlowStep lhs, FlowStep rhs )
{
return Integer.valueOf( lhs.getSubmitPriority() ).compareTo( rhs.getSubmitPriority() );
}
} ) );
}
/**
* Method writeDOT writes this element graph to a DOT file for easy vizualization and debugging.
*
* @param filename of type String
*/
public void writeDOT( String filename )
{
printElementGraph( filename );
}
protected void printElementGraph( String filename )
{
try
{
Writer writer = new FileWriter( filename );
Util.writeDOT( writer, this, new IntegerNameProvider<FlowStep>(), new VertexNameProvider<FlowStep>()
{
public String getVertexName( FlowStep object )
{
String sourceName = "";
for( Tap source : object.sources.keySet() )
{
if( source instanceof TempHfs )
continue;
sourceName += "[" + source.getPath() + "]";
}
String sinkName = object.sink instanceof TempHfs ? "" : "[" + object.sink.getPath() + "]";
String groupName = object.getGroup() == null ? "" : object.getGroup().getName();
String name = "[" + object.getName() + "]";
if( sourceName.length() != 0 )
name += "\\nsrc:" + sourceName;
if( groupName.length() != 0 )
name += "\\ngrp:" + groupName;
if( sinkName.length() != 0 )
name += "\\nsnk:" + sinkName;
return name.replaceAll( "\"", "\'" );
}
}, null );
writer.close();
}
catch( IOException exception )
{
exception.printStackTrace();
}
}
}