/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.scheme;
import java.io.IOException;
import java.util.Properties;
import cascading.CascadingTestCase;
import cascading.flow.Flow;
import cascading.flow.FlowConnector;
import cascading.pipe.Pipe;
import cascading.tap.Hfs;
import cascading.tap.SinkMode;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntryIterator;
/**
*
*/
public class TextDelimitedTest extends CascadingTestCase
{
String testData = "build/test/data/delimited.txt";
String testSpecialCharData = "build/test/data/delimited-spec-char.txt";
String outputPath = "build/test/output/delim";
public TextDelimitedTest()
{
super( "delimited text tests" );
}
public void testQuotedText() throws IOException
{
runQuotedText( "normchar", testData, ",", false );
}
public void testQuotedTextAll() throws IOException
{
runQuotedText( "normchar", testData, ",", true );
}
public void testQuotedTextSpecChar() throws IOException
{
runQuotedText( "specchar", testSpecialCharData, "|", false );
}
public void testQuotedTextSpecCharAll() throws IOException
{
runQuotedText( "specchar", testSpecialCharData, "|", true );
}
public void runQuotedText( String path, String inputData, String delimiter, boolean useAll ) throws IOException
{
Object[][] results = new Object[][]{
{"foo", "bar", "baz", "bin", 1L},
{"foo", "bar", "baz", "bin", 2L},
{"foo", "bar" + delimiter + "bar", "baz", "bin", 3L},
{"foo", "bar\"" + delimiter + "bar", "baz", "bin", 4L},
{"foo", "bar\"\"" + delimiter + "bar", "baz", "bin", 5L},
{null, null, "baz", null, 6L},
{null, null, null, null, 7L},
{"foo", null, null, null, 8L},
{null, null, null, null, 9L},
{"f", null, null, null, 10L}, // this one is quoted, single char
{"f", null, null, ",bin", 11L}
};
if( useAll )
{
for( int i = 0; i < results.length; i++ )
{
Object[] result = results[ i ];
for( int j = 0; j < result.length; j++ )
result[ j ] = result[ j ] != null ? result[ j ].toString() : null;
}
}
Tuple[] tuples = new Tuple[results.length];
for( int i = 0; i < results.length; i++ )
tuples[ i ] = new Tuple( results[ i ] );
Properties properties = new Properties();
Class[] types = new Class[]{String.class, String.class, String.class, String.class, long.class};
Fields fields = new Fields( "first", "second", "third", "fourth", "fifth" );
if( useAll )
{
types = null;
fields = Fields.ALL;
}
TextDelimited scheme = new TextDelimited( fields, delimiter, "\"", types );
Hfs input = new Hfs( scheme, inputData );
Hfs output = new Hfs( scheme, outputPath + "/quoted/" + path, SinkMode.REPLACE );
Pipe pipe = new Pipe( "pipe" );
Flow flow = new FlowConnector( properties ).connect( input, output, pipe );
flow.complete();
validateLength( flow, results.length, 5 );
// validate input parsing compares to expected, and results compare to expected
TupleEntryIterator iterator = flow.openSource();
int count = 0;
while( iterator.hasNext() )
{
Tuple tuple = iterator.next().getTuple();
assertEquals( tuples[ count++ ], tuple );
}
iterator = flow.openSink();
count = 0;
while( iterator.hasNext() )
{
Tuple tuple = iterator.next().getTuple();
assertEquals( tuples[ count++ ], tuple );
}
}
public void testHeader() throws IOException
{
Properties properties = new Properties();
Class[] types = new Class[]{String.class, String.class, String.class, String.class, long.class};
Fields fields = new Fields( "first", "second", "third", "fourth", "fifth" );
Hfs input = new Hfs( new TextDelimited( fields, true, ",", "\"", types ), testData );
Hfs output = new Hfs( new TextDelimited( fields, ",", "\"", types ), outputPath + "/header", SinkMode.REPLACE );
Pipe pipe = new Pipe( "pipe" );
Flow flow = new FlowConnector( properties ).connect( input, output, pipe );
flow.complete();
validateLength( flow, 10, 5 );
}
}