/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.cascade; import java.io.IOException; import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; import cascading.ClusterTestCase; import cascading.flow.Flow; import cascading.flow.FlowConnector; import cascading.flow.FlowSkipStrategy; import cascading.flow.FlowStepJob; import cascading.flow.LockingFlowListener; import cascading.flow.ProcessFlow; import cascading.operation.Identity; import cascading.operation.regex.RegexSplitter; import cascading.operation.text.FieldJoiner; import cascading.pipe.Each; import cascading.pipe.Pipe; import cascading.scheme.SequenceFile; import cascading.scheme.TextLine; import cascading.tap.Hfs; import cascading.tap.MultiSourceTap; import cascading.tap.Tap; import cascading.tuple.Fields; import riffle.process.scheduler.ProcessChain; public class CascadeTest extends ClusterTestCase { String inputFile = "build/test/data/ips.20.txt"; String outputPath = "build/test/output/cascade/"; public CascadeTest() { super( "cascade tests", true ); } private Flow firstFlow( String path ) { Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFile ); Pipe pipe = new Pipe( "first" ); pipe = new Each( pipe, new Fields( "line" ), new Identity( new Fields( "ip" ) ), new Fields( "ip" ) ); Tap sink = new Hfs( new SequenceFile( new Fields( "ip" ) ), outputPath + path + "/first", true ); return new FlowConnector( getProperties() ).connect( source, sink, pipe ); } private Flow secondFlow( Tap source, String path ) { Pipe pipe = new Pipe( "second" ); pipe = new Each( pipe, new RegexSplitter( new Fields( "first", "second", "third", "fourth" ), "\\." ) ); Tap sink = new Hfs( new SequenceFile( new Fields( "first", "second", "third", "fourth" ) ), outputPath + path + "/second", true ); return new FlowConnector( getProperties() ).connect( source, sink, pipe ); } private Flow thirdFlow( Tap source, String path ) { Pipe pipe = new Pipe( "third" ); pipe = new Each( pipe, new FieldJoiner( new Fields( "mangled" ), "-" ) ); Tap sink = new Hfs( new SequenceFile( new Fields( "mangled" ) ), outputPath + path + "/third", true ); return new FlowConnector( getProperties() ).connect( source, sink, pipe ); } private Flow fourthFlow( Tap source, String path ) { Pipe pipe = new Pipe( "fourth" ); pipe = new Each( pipe, new Identity() ); Tap sink = new Hfs( new TextLine(), outputPath + path + "/fourth", true ); return new FlowConnector( getProperties() ).connect( source, sink, pipe ); } private Flow previousMultiTapFlow( String path, String ordinal ) { Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFile ); Pipe pipe = new Pipe( ordinal ); pipe = new Each( pipe, new Fields( "line" ), new Identity( new Fields( "ip" ) ), new Fields( "ip" ) ); Tap sink = new Hfs( new SequenceFile( new Fields( "ip" ) ), outputPath + path + "/" + ordinal, true ); return new FlowConnector( getProperties() ).connect( source, sink, pipe ); } private Flow multiTapFlow( Tap[] sources, String path ) { Pipe pipe = new Pipe( "multitap" ); pipe = new Each( pipe, new Identity() ); Tap source = new MultiSourceTap( sources ); Tap sink = new Hfs( new TextLine(), outputPath + path + "/multitap", true ); return new FlowConnector( getProperties() ).connect( source, sink, pipe ); } public void testSimpleCascade() throws IOException { copyFromLocal( inputFile ); String path = "simple"; Flow first = firstFlow( path ); Flow second = secondFlow( first.getSink(), path ); Flow third = thirdFlow( second.getSink(), path ); Flow fourth = fourthFlow( third.getSink(), path ); Cascade cascade = new CascadeConnector().connect( fourth, second, third, first ); cascade.start(); cascade.complete(); validateLength( fourth, 20 ); } public void testMultiTapCascade() throws IOException { copyFromLocal( inputFile ); String path = "multitap"; Flow first = previousMultiTapFlow( path, "first" ); Flow second = previousMultiTapFlow( path, "second" ); Flow multitap = multiTapFlow( Tap.taps( first.getSink(), second.getSink() ), path ); Cascade cascade = new CascadeConnector().connect( multitap, first, second ); cascade.start(); cascade.complete(); validateLength( multitap, 40 ); } public void testSkippedCascade() throws IOException { copyFromLocal( inputFile ); String path = "skipped"; Flow first = firstFlow( path ); Flow second = secondFlow( first.getSink(), path ); Flow third = thirdFlow( second.getSink(), path ); Flow fourth = fourthFlow( third.getSink(), path ); Cascade cascade = new CascadeConnector().connect( first, second, third, fourth ); cascade.setFlowSkipStrategy( new FlowSkipStrategy() { public boolean skipFlow( Flow flow ) throws IOException { return true; } } ); cascade.start(); cascade.complete(); assertFalse( "file exists", fourth.getSink().pathExists( fourth.getJobConf() ) ); } public void testSimpleCascadeStop() throws IOException, InterruptedException { copyFromLocal( inputFile ); String path = "stopped"; Flow first = firstFlow( path ); Flow second = secondFlow( first.getSink(), path ); Flow third = thirdFlow( second.getSink(), path ); Flow fourth = fourthFlow( third.getSink(), path ); LockingFlowListener listener = new LockingFlowListener(); first.addListener( listener ); Cascade cascade = new CascadeConnector().connect( first, second, third, fourth ); System.out.println( "calling start" ); cascade.start(); assertTrue( "did not start", listener.started.tryAcquire( 60, TimeUnit.SECONDS ) ); while( true ) { System.out.println( "testing if running" ); Thread.sleep( 1000 ); Map<String, Callable<Throwable>> map = LockingFlowListener.getJobsMap( first ); if( map == null || map.values().size() == 0 ) continue; if( ( (FlowStepJob) map.values().iterator().next() ).wasStarted() ) break; } System.out.println( "calling stop" ); cascade.stop(); assertTrue( "did not stop", listener.stopped.tryAcquire( 60, TimeUnit.SECONDS ) ); assertTrue( "did not complete", listener.completed.tryAcquire( 60, TimeUnit.SECONDS ) ); } public void testCascadeID() throws IOException { String path = "simple"; Flow first = firstFlow( path ); Flow second = secondFlow( first.getSink(), path ); Flow third = thirdFlow( second.getSink(), path ); Flow fourth = fourthFlow( third.getSink(), path ); Cascade cascade = new CascadeConnector().connect( first, second, third, fourth ); String id = cascade.getID(); assertNotNull( "id is null", id ); assertEquals( first.getProperty( "cascading.cascade.id" ), id ); assertEquals( second.getProperty( "cascading.cascade.id" ), id ); assertEquals( third.getProperty( "cascading.cascade.id" ), id ); assertEquals( fourth.getProperty( "cascading.cascade.id" ), id ); } public void testSimplePerpetual() throws IOException { copyFromLocal( inputFile ); String path = "perpetual"; Flow first = firstFlow( path ); Flow second = secondFlow( first.getSink(), path ); Flow third = thirdFlow( second.getSink(), path ); Flow fourth = fourthFlow( third.getSink(), path ); ProcessChain chain = new ProcessChain( true, fourth, second, first, third ); chain.start(); chain.complete(); validateLength( fourth, 20 ); } public void testSimplePerpetualCascade() throws IOException { copyFromLocal( inputFile ); String path = "perpetualcascade"; Flow first = firstFlow( path ); Flow second = secondFlow( first.getSink(), path ); Flow third = thirdFlow( second.getSink(), path ); Flow fourth = fourthFlow( third.getSink(), path ); ProcessFlow firstProcess = new ProcessFlow( "first", first ); ProcessFlow secondProcess = new ProcessFlow( "second", second ); ProcessFlow thirdProcess = new ProcessFlow( "third", third ); ProcessFlow fourthProcess = new ProcessFlow( "fourth", fourth ); Cascade cascade = new CascadeConnector().connect( fourthProcess, secondProcess, firstProcess, thirdProcess ); cascade.start(); cascade.complete(); validateLength( fourth, 20 ); } }