/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading;
import java.io.File;
import java.io.IOException;
import cascading.flow.Flow;
import cascading.flow.FlowConnector;
import cascading.operation.Debug;
import cascading.operation.Identity;
import cascading.operation.regex.RegexFilter;
import cascading.operation.regex.RegexSplitter;
import cascading.pipe.Each;
import cascading.pipe.Pipe;
import cascading.scheme.TextLine;
import cascading.tap.Hfs;
import cascading.tap.Tap;
import cascading.tuple.Fields;
import cascading.tuple.TupleEntryIterator;
import org.apache.hadoop.mapred.JobConf;
public class RegressionMiscTest extends CascadingTestCase
{
String inputFileNums10 = "build/test/data/nums.10.txt";
String outputPath = "build/test/output/regressionmisc/";
public RegressionMiscTest()
{
super( "regression misc" );
}
/**
* sanity check to make sure writeDOT still works
*
* @throws Exception
*/
public void testWriteDot() throws Exception
{
Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "/input" );
Tap sink = new Hfs( new TextLine(), outputPath + "/unknown", true );
Pipe pipe = new Pipe( "test" );
pipe = new Each( pipe, new Fields( "line" ), new RegexSplitter( Fields.UNKNOWN ) );
pipe = new Each( pipe, new Debug() );
pipe = new Each( pipe, new Fields( 2 ), new Identity( new Fields( "label" ) ) );
pipe = new Each( pipe, new Debug() );
pipe = new Each( pipe, new Fields( "label" ), new RegexFilter( "[A-Z]*" ) );
pipe = new Each( pipe, new Debug() );
Flow flow = new FlowConnector().connect( source, sink, pipe );
new File( outputPath ).mkdirs();
flow.writeDOT( outputPath + "/writedot.dot" );
}
/**
* verifies sink fields are consulted during planning
*
* @throws IOException
*/
public void testSinkDeclaredFieldsFails() throws IOException
{
Tap source = new Hfs( new TextLine( new Fields( "line" ) ), "/input" );
Pipe pipe = new Pipe( "test" );
pipe = new Each( pipe, new RegexSplitter( new Fields( "first", "second", "third" ), "\\s" ), Fields.ALL );
Tap sink = new Hfs( new TextLine( new Fields( "line" ), new Fields( "first", "second", "fifth" ) ), "output", true );
try
{
Flow flow = new FlowConnector().connect( source, sink, pipe );
fail( "did not fail on bad sink field names" );
}
catch( Exception exception )
{
// ignore
}
}
public void testTupleEntryNextTwice() throws IOException
{
Tap tap = new Hfs( new TextLine(), inputFileNums10 );
TupleEntryIterator iterator = tap.openForRead( new JobConf() );
int count = 0;
while( iterator.hasNext() )
{
iterator.next();
count++;
}
assertFalse( iterator.hasNext() );
assertEquals( 10, count );
}
}