/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.flow;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import cascading.operation.AssertionLevel;
import cascading.operation.DebugLevel;
import cascading.pipe.Each;
import cascading.pipe.Every;
import cascading.pipe.Group;
import cascading.pipe.Pipe;
import cascading.pipe.SubAssembly;
import cascading.tap.Tap;
import cascading.util.Util;
import org.apache.log4j.Logger;
import org.jgrapht.GraphPath;
import org.jgrapht.Graphs;
/** Class FlowPlanner is the base class for all planner implementations. */
public class FlowPlanner
{
/** Field LOG */
private static final Logger LOG = Logger.getLogger( FlowPlanner.class );
/** Field properties */
protected final Map<Object, Object> properties;
/** Field assertionLevel */
protected AssertionLevel assertionLevel;
/** Field debugLevel */
protected DebugLevel debugLevel;
FlowPlanner( Map<Object, Object> properties )
{
this.properties = properties;
this.assertionLevel = FlowConnector.getAssertionLevel( properties );
this.debugLevel = FlowConnector.getDebugLevel( properties );
}
/** Must be called to determine if all elements of the base pipe assembly are available */
protected void verifyAssembly( Pipe[] pipes, Map<String, Tap> sources, Map<String, Tap> sinks, Map<String, Tap> traps )
{
verifySourceNotSinks( sources, sinks );
verifyTaps( sources, true, true );
verifyTaps( sinks, false, true );
verifyTaps( traps, false, false );
verifyPipeAssemblyEndPoints( sources, sinks, pipes );
verifyTraps( traps, pipes, sources, sinks );
}
/** Creates a new ElementGraph instance. */
protected ElementGraph createElementGraph( Pipe[] pipes, Map<String, Tap> sources, Map<String, Tap> sinks, Map<String, Tap> traps )
{
return new ElementGraph( pipes, sources, sinks, traps, assertionLevel, debugLevel );
}
protected void verifySourceNotSinks( Map<String, Tap> sources, Map<String, Tap> sinks )
{
Collection<Tap> sourcesSet = sources.values();
for( Tap tap : sinks.values() )
{
if( sourcesSet.contains( tap ) )
throw new PlannerException( "tap may not be used as both source and sink in the same Flow: " + tap );
}
}
/**
* Method verifyTaps ...
*
* @param taps of type Map<String, Tap>
* @param areSources of type boolean
* @param mayNotBeEmpty of type boolean
*/
protected void verifyTaps( Map<String, Tap> taps, boolean areSources, boolean mayNotBeEmpty )
{
if( mayNotBeEmpty && taps.isEmpty() )
throw new PlannerException( ( areSources ? "source" : "sink" ) + " taps are required" );
for( String tapName : taps.keySet() )
{
if( areSources && !taps.get( tapName ).isSource() )
throw new PlannerException( "tap named: '" + tapName + "', cannot be used as a source: " + taps.get( tapName ) );
else if( !areSources && !taps.get( tapName ).isSink() )
throw new PlannerException( "tap named: '" + tapName + "', cannot be used as a sink: " + taps.get( tapName ) );
}
}
/**
* Method verifyEndPoints verifies
* <p/>
* there aren't dupe names in heads or tails.
* all the sink and source tap names match up with tail and head pipes
*
* @param sources of type Map<String, Tap>
* @param sinks of type Map<String, Tap>
* @param pipes of type Pipe[]
*/
// todo: force dupe names to throw exceptions
protected void verifyPipeAssemblyEndPoints( Map<String, Tap> sources, Map<String, Tap> sinks, Pipe[] pipes )
{
Set<String> tapNames = new HashSet<String>();
tapNames.addAll( sources.keySet() );
tapNames.addAll( sinks.keySet() );
// handle tails
Set<Pipe> tails = new HashSet<Pipe>();
Set<String> tailNames = new HashSet<String>();
for( Pipe pipe : pipes )
{
if( pipe instanceof SubAssembly )
{
for( Pipe tail : ( (SubAssembly) pipe ).getTails() )
{
String tailName = tail.getName();
if( !tapNames.contains( tailName ) )
throw new PlannerException( tail, "pipe name not found in either sink or source map: '" + tailName + "'" );
if( tailNames.contains( tailName ) && !tails.contains( tail ) )
LOG.warn( "duplicate tail name found: '" + tailName + "'" );
// throw new PlannerException( pipe, "duplicate tail name found: " + tailName );
tailNames.add( tailName );
tails.add( tail );
}
}
else
{
String tailName = pipe.getName();
if( !tapNames.contains( tailName ) )
throw new PlannerException( pipe, "pipe name not found in either sink or source map: '" + tailName + "'" );
if( tailNames.contains( tailName ) && !tails.contains( pipe ) )
LOG.warn( "duplicate tail name found: '" + tailName + "'" );
// throw new PlannerException( pipe, "duplicate tail name found: " + tailName );
tailNames.add( tailName );
tails.add( pipe );
}
}
// Set<String> allTailNames = new HashSet<String>( tailNames );
tailNames.removeAll( sinks.keySet() );
Set<String> remainingSinks = new HashSet<String>( sinks.keySet() );
remainingSinks.removeAll( tailNames );
if( tailNames.size() != 0 )
throw new PlannerException( "not all tail pipes bound to sink taps, remaining tail pipe names: [" + Util.join( Util.quote( tailNames, "'" ), ", " ) + "], remaining sink tap names: [" + Util.join( Util.quote( remainingSinks, "'" ), ", " ) + "]" );
// unlike heads, pipes can input to another pipe and simultaneously be a sink
// so there is no way to know all the intentional tails, so they aren't listed below in the exception
remainingSinks = new HashSet<String>( sinks.keySet() );
remainingSinks.removeAll( Arrays.asList( Pipe.names( pipes ) ) );
if( remainingSinks.size() != 0 )
throw new PlannerException( "not all sink taps bound to tail pipes, remaining sink tap names: [" + Util.join( Util.quote( remainingSinks, "'" ), ", " ) + "]" );
// handle heads
Set<Pipe> heads = new HashSet<Pipe>();
Set<String> headNames = new HashSet<String>();
for( Pipe pipe : pipes )
{
for( Pipe head : pipe.getHeads() )
{
String headName = head.getName();
if( !tapNames.contains( headName ) )
throw new PlannerException( head, "pipe name not found in either sink or source map: '" + headName + "'" );
if( headNames.contains( headName ) && !heads.contains( head ) )
LOG.warn( "duplicate head name found, not an error but heads should have unique names: '" + headName + "'" );
// throw new PlannerException( pipe, "duplicate head name found: " + headName );
headNames.add( headName );
heads.add( head );
}
}
Set<String> allHeadNames = new HashSet<String>( headNames );
headNames.removeAll( sources.keySet() );
Set<String> remainingSources = new HashSet<String>( sources.keySet() );
remainingSources.removeAll( headNames );
if( headNames.size() != 0 )
throw new PlannerException( "not all head pipes bound to source taps, remaining head pipe names: [" + Util.join( Util.quote( headNames, "'" ), ", " ) + "], remaining source tap names: [" + Util.join( Util.quote( remainingSources, "'" ), ", " ) + "]" );
remainingSources = new HashSet<String>( sources.keySet() );
remainingSources.removeAll( allHeadNames );
if( remainingSources.size() != 0 )
throw new PlannerException( "not all source taps bound to head pipes, remaining source tap names: [" + Util.join( Util.quote( remainingSources, "'" ), ", " ) + "], remaining head pipe names: [" + Util.join( Util.quote( headNames, "'" ), ", " ) + "]" );
}
protected void verifyTraps( Map<String, Tap> traps, Pipe[] pipes, Map<String, Tap> sources, Map<String, Tap> sinks )
{
verifyTrapsNotSourcesSinks( traps, sources, sinks );
Set<String> names = new HashSet<String>();
Collections.addAll( names, Pipe.names( pipes ) );
for( String name : traps.keySet() )
{
if( !names.contains( name ) )
throw new PlannerException( "trap name not found in assembly: '" + name + "'" );
}
}
private void verifyTrapsNotSourcesSinks( Map<String, Tap> traps, Map<String, Tap> sources, Map<String, Tap> sinks )
{
Collection<Tap> sourceTaps = sources.values();
Collection<Tap> sinkTaps = sinks.values();
for( Tap tap : traps.values() )
{
if( sourceTaps.contains( tap ) )
throw new PlannerException( "tap may not be used as both a trap and a source in the same Flow: " + tap );
if( sinkTaps.contains( tap ) )
throw new PlannerException( "tap may not be used as both a trap and a sink in the same Flow: " + tap );
}
}
/**
* Verifies that there are not only GroupAssertions following any given Group instance. This will adversely
* affect the stream entering any subsquent Tap of Each instances.
*/
protected void failOnLoneGroupAssertion( ElementGraph elementGraph )
{
List<Group> groups = elementGraph.findAllGroups();
// walk Every instances after Group
for( Group group : groups )
{
for( GraphPath<FlowElement, Scope> path : elementGraph.getAllShortestPathsFrom( group ) )
{
List<FlowElement> flowElements = Graphs.getPathVertexList( path ); // last element is tail
int everies = 0;
int assertions = 0;
for( FlowElement flowElement : flowElements )
{
if( flowElement instanceof Group )
continue;
if( !( flowElement instanceof Every ) )
break;
everies++;
Every every = (Every) flowElement;
if( every.getPlannerLevel() != null )
assertions++;
}
if( everies != 0 && everies == assertions )
throw new PlannerException( "group assertions must be accompanied by aggregator operations" );
}
}
}
protected void failOnMissingGroup( ElementGraph elementGraph )
{
List<Every> everies = elementGraph.findAllEveries();
// walk Every instances after Group
for( Every every : everies )
{
for( GraphPath<FlowElement, Scope> path : elementGraph.getAllShortestPathsTo( every ) )
{
List<FlowElement> flowElements = Graphs.getPathVertexList( path ); // last element is every
Collections.reverse( flowElements ); // first element is every
for( FlowElement flowElement : flowElements )
{
if( flowElement instanceof Each )
throw new PlannerException( (Pipe) flowElement, "Every may only be preceded by another Every or a Group pipe, found: " + flowElement );
if( flowElement instanceof Every )
continue;
if( flowElement instanceof Group )
break;
}
}
}
}
protected void failOnMisusedBuffer( ElementGraph elementGraph )
{
List<Every> everies = elementGraph.findAllEveries();
// walk Every instances after Group
for( Every every : everies )
{
for( GraphPath<FlowElement, Scope> path : elementGraph.getAllShortestPathsTo( every ) )
{
List<FlowElement> flowElements = Graphs.getPathVertexList( path ); // last element is every
Collections.reverse( flowElements ); // first element is every
Every last = null;
boolean foundBuffer = false;
int foundEveries = -1;
for( FlowElement flowElement : flowElements )
{
if( flowElement instanceof Each )
throw new PlannerException( (Pipe) flowElement, "Every may only be preceded by another Every or a GroupBy or CoGroup pipe, found: " + flowElement );
if( flowElement instanceof Every )
{
foundEveries++;
boolean isBuffer = ( (Every) flowElement ).isBuffer();
if( foundEveries != 0 && ( isBuffer || foundBuffer ) )
throw new PlannerException( (Pipe) flowElement, "Only one Every with a Buffer may follow a GroupBy or CoGroup pipe, no other Every instances are allowed immediately before or after, found: " + flowElement + " before: " + last );
if( !foundBuffer )
foundBuffer = isBuffer;
last = (Every) flowElement;
}
if( flowElement instanceof Group )
break;
}
}
}
}
protected void failOnGroupEverySplit( ElementGraph elementGraph )
{
List<Group> groups = new ArrayList<Group>();
elementGraph.findAllOfType( 1, 2, Group.class, groups );
for( Group group : groups )
{
Set<FlowElement> children = elementGraph.getAllChildrenNotExactlyType( group, Pipe.class );
for( FlowElement flowElement : children )
{
if( flowElement instanceof Every )
throw new PlannerException( (Every) flowElement, "Every instances may not split after a GroupBy or CoGroup pipe, found: " + flowElement + " after: " + group );
}
}
}
}