/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.flow; import java.io.File; import cascading.ClusterTestCase; import cascading.operation.regex.RegexParser; import cascading.pipe.Each; import cascading.pipe.Pipe; import cascading.scheme.TextLine; import cascading.tap.Hfs; import cascading.tap.SinkMode; import cascading.tap.Tap; import cascading.tuple.Fields; public class FlowSkipTest extends ClusterTestCase { String inputFileApache = "build/test/data/apache.10.txt"; String outputPath = "build/test/output/flowskip/"; public FlowSkipTest() { super( "flow skip", false ); // leave cluster testing disabled } public void testSkipStrategiesReplace() throws Exception { if( !new File( inputFileApache ).exists() ) fail( "data file not found" ); copyFromLocal( inputFileApache ); Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileApache ); // !!! enable replace Tap sink = new Hfs( new TextLine(), outputPath + "/replace", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); Flow flow = new FlowConnector( getProperties() ).connect( source, sink, pipe ); sink.deletePath( flow.getJobConf() ); assertTrue( "default skip", !flow.getFlowSkipStrategy().skipFlow( flow ) ); assertTrue( "exist skip", !new FlowSkipIfSinkExists().skipFlow( flow ) ); flow.complete(); assertTrue( "default skip", !flow.getFlowSkipStrategy().skipFlow( flow ) ); assertTrue( "exist skip", !new FlowSkipIfSinkExists().skipFlow( flow ) ); FlowSkipStrategy old = flow.getFlowSkipStrategy(); FlowSkipStrategy replaced = flow.setFlowSkipStrategy( new FlowSkipIfSinkExists() ); assertTrue( "not same instance", old == replaced ); validateLength( flow.openSource(), 10 ); // validate source, this once, as a sanity check validateLength( flow, 10, null ); } public void testSkipStrategiesKeep() throws Exception { if( !new File( inputFileApache ).exists() ) fail( "data file not found" ); copyFromLocal( inputFileApache ); Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileApache ); // !!! enable replace Tap sink = new Hfs( new TextLine(), outputPath + "/keep", SinkMode.KEEP ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); Flow flow = new FlowConnector( getProperties() ).connect( source, sink, pipe ); sink.deletePath( flow.getJobConf() ); assertTrue( "default skip", !flow.getFlowSkipStrategy().skipFlow( flow ) ); assertTrue( "exist skip", !new FlowSkipIfSinkExists().skipFlow( flow ) ); flow.complete(); assertTrue( "default skip", flow.getFlowSkipStrategy().skipFlow( flow ) ); assertTrue( "exist skip", new FlowSkipIfSinkExists().skipFlow( flow ) ); validateLength( flow.openSource(), 10 ); // validate source, this once, as a sanity check validateLength( flow, 10, null ); } }