/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.textfileinput;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.pentaho.di.TestFailedException;
import org.pentaho.di.TestUtilities;
import org.pentaho.di.core.KettleEnvironment;
import org.pentaho.di.core.RowMetaAndData;
import org.pentaho.di.core.compress.CompressionPluginType;
import org.pentaho.di.core.plugins.PluginRegistry;
import org.pentaho.di.core.plugins.StepPluginType;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.row.value.ValueMetaInteger;
import org.pentaho.di.core.row.value.ValueMetaString;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.trans.RowProducer;
import org.pentaho.di.trans.RowStepCollector;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransHopMeta;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.reporting.libraries.base.util.CSVTokenizer;
import junit.framework.TestCase;
/**
* This class was a "copy and modification" of Kettle's CsvInput1Test. I added comments as I was learning the
* architecture of the class.
*
* @author sflatley
*/
public class TextFileInputTests extends TestCase {
@BeforeClass
public static void setUpBeforeClass() throws Exception {
PluginRegistry.addPluginType( CompressionPluginType.getInstance() );
PluginRegistry.init( true );
}
@AfterClass
public static void tearDownAfterClass() throws Exception {
}
/**
* Write the file to be used as input (as a temporary file).
*
* @return Absolute file name/path of the created file.
* @throws IOException
* UPON
*/
public String writeInputFile( int testNum ) throws IOException {
String rcode = null;
File tempFile = File.createTempFile( "PDI_tmp", ".tmp" );
tempFile.deleteOnExit();
rcode = tempFile.getAbsolutePath();
String endOfLineCharacters = TestUtilities.getEndOfLineCharacters();
FileWriter fout = new FileWriter( tempFile );
switch ( testNum ) {
case 1: {
writeInput1( endOfLineCharacters, fout );
break;
}
case 2: {
writeInput2( endOfLineCharacters, fout );
break;
}
default:
break;
}
fout.close();
return rcode;
}
private void writeInput2( String endOfLineCharacters, FileWriter fout ) throws IOException {
fout.write( "\t123456\t;\t1234567\t" + endOfLineCharacters );
fout.write( "\t654321\t;\t7654321\t" + endOfLineCharacters );
}
private void writeInput1( String endOfLineCharacters, FileWriter fout ) throws IOException {
fout.write( "A;B;C;D;E" + endOfLineCharacters );
fout.write( "1;b1;c1;d1;e1" + endOfLineCharacters );
fout.write( "2;b2;c2;d2;e2" + endOfLineCharacters );
fout.write( "3;b3;c3;d3;e3" + endOfLineCharacters );
}
/**
* Create result data for test case 1. Each Object array in element in list should mirror the data written by
* writeInputFile().
*
* @return list of metadata/data couples of how the result should look like.
*/
public List<RowMetaAndData> createResultData1() {
List<RowMetaAndData> list = new ArrayList<RowMetaAndData>();
ValueMetaInterface[] valuesMeta =
{ new ValueMetaInteger( "a" ), new ValueMetaString( "b" ),
new ValueMetaString( "c" ), new ValueMetaString( "d" ),
new ValueMetaString( "e" ), new ValueMetaString( "filename" ), };
RowMetaInterface rm = createResultRowMetaInterface( valuesMeta );
Object[] r1 = new Object[] { new Long( 1L ), "b1", "c1", "d1", "e1", "fileName" };
Object[] r2 = new Object[] { new Long( 2L ), "b2", "c2", "d2", "e2", "fileName" };
Object[] r3 = new Object[] { new Long( 3L ), "b3", "c3", "d3", "e3", "fileName" };
list.add( new RowMetaAndData( rm, r1 ) );
list.add( new RowMetaAndData( rm, r2 ) );
list.add( new RowMetaAndData( rm, r3 ) );
return list;
}
/**
* Create result data for test case 2. Each Object array in element in list should mirror the data written by
* writeInputFile().
*
* @return list of metadata/data couples of how the result should look like.
*/
public List<RowMetaAndData> createResultData2() {
List<RowMetaAndData> list = new ArrayList<RowMetaAndData>();
ValueMetaInterface[] valuesMeta =
{ new ValueMetaInteger( "a", 15, -1 ), new ValueMetaInteger( "b", 15, -1 ) };
RowMetaInterface rm = createResultRowMetaInterface( valuesMeta );
Object[] r1 = new Object[] { 123456L, (long) 1234567 };
Object[] r2 = new Object[] { 654321L, (long) 7654321 };
list.add( new RowMetaAndData( rm, r1 ) );
list.add( new RowMetaAndData( rm, r2 ) );
return list;
}
/**
* Creates a RowMetaInterface with a ValueMetaInterface with the name "filename".
*
* @return
*/
public RowMetaInterface createRowMetaInterface() {
RowMetaInterface rowMeta = new RowMeta();
ValueMetaInterface[] valuesMeta = { new ValueMetaString( "filename" ), };
for ( int i = 0; i < valuesMeta.length; i++ ) {
rowMeta.addValueMeta( valuesMeta[i] );
}
return rowMeta;
}
/**
* Creates data... Will add more as I figure what the data is.
*
* @param fileName
* @return
*/
public List<RowMetaAndData> createData( String fileName ) {
List<RowMetaAndData> list = new ArrayList<RowMetaAndData>();
RowMetaInterface rm = createRowMetaInterface();
Object[] r1 = new Object[] { fileName };
list.add( new RowMetaAndData( rm, r1 ) );
return list;
}
/**
* Creates a row meta interface for the fields that are defined by performing a getFields and by checking "Result
* filenames - Add filenames to result from "Text File Input" dialog.
*
* @return
*/
public RowMetaInterface createResultRowMetaInterface( ValueMetaInterface[] valuesMeta ) {
RowMetaInterface rm = new RowMeta();
for ( int i = 0; i < valuesMeta.length; i++ ) {
rm.addValueMeta( valuesMeta[i] );
}
return rm;
}
private StepMeta createTextFileInputStep( String name, String fileName, PluginRegistry registry ) {
// Create a Text File Input step
String testFileInputName = "text file input step";
TextFileInputMeta textFileInputMeta = new TextFileInputMeta();
String textFileInputPid = registry.getPluginId( StepPluginType.class, textFileInputMeta );
StepMeta textFileInputStep = new StepMeta( textFileInputPid, testFileInputName, textFileInputMeta );
// initialize the fields
TextFileInputField[] fields = new TextFileInputField[5];
for ( int idx = 0; idx < fields.length; idx++ ) {
fields[idx] = new TextFileInputField();
}
// populate the fields
// it is important that the setPosition(int)
// is invoked with the correct position as
// we are testing the reading of a delimited file.
fields[0].setName( "a" );
fields[0].setType( ValueMetaInterface.TYPE_INTEGER );
fields[0].setFormat( "" );
fields[0].setLength( -1 );
fields[0].setPrecision( -1 );
fields[0].setCurrencySymbol( "" );
fields[0].setDecimalSymbol( "" );
fields[0].setGroupSymbol( "" );
fields[0].setTrimType( ValueMetaInterface.TRIM_TYPE_NONE );
fields[0].setPosition( 1 );
fields[1].setName( "b" );
fields[1].setType( ValueMetaInterface.TYPE_STRING );
fields[1].setFormat( "" );
fields[1].setLength( -1 );
fields[1].setPrecision( -1 );
fields[1].setCurrencySymbol( "" );
fields[1].setDecimalSymbol( "" );
fields[1].setGroupSymbol( "" );
fields[1].setTrimType( ValueMetaInterface.TRIM_TYPE_NONE );
fields[1].setPosition( 2 );
fields[2].setName( "c" );
fields[2].setType( ValueMetaInterface.TYPE_STRING );
fields[2].setFormat( "" );
fields[2].setLength( -1 );
fields[2].setPrecision( -1 );
fields[2].setCurrencySymbol( "" );
fields[2].setDecimalSymbol( "" );
fields[2].setGroupSymbol( "" );
fields[2].setTrimType( ValueMetaInterface.TRIM_TYPE_NONE );
fields[2].setPosition( 3 );
fields[3].setName( "d" );
fields[3].setType( ValueMetaInterface.TYPE_STRING );
fields[3].setFormat( "" );
fields[3].setLength( -1 );
fields[3].setPrecision( -1 );
fields[3].setCurrencySymbol( "" );
fields[3].setDecimalSymbol( "" );
fields[3].setGroupSymbol( "" );
fields[3].setTrimType( ValueMetaInterface.TRIM_TYPE_NONE );
fields[3].setPosition( 4 );
fields[4].setName( "e" );
fields[4].setType( ValueMetaInterface.TYPE_STRING );
fields[4].setFormat( "" );
fields[4].setLength( -1 );
fields[4].setPrecision( -1 );
fields[4].setCurrencySymbol( "" );
fields[4].setDecimalSymbol( "" );
fields[4].setGroupSymbol( "" );
fields[4].setTrimType( ValueMetaInterface.TRIM_TYPE_NONE );
fields[4].setPosition( 5 );
// call this so that we allocate the arrays
// for files, fields and filters.
// we are testing one file and one set
// of fields. No filters
textFileInputMeta.allocate( 1, 1, 0 );
// set meta properties- these were determined by running Spoon
// and setting up the transformation we are setting up here.
// i.e. - the dialog told me what I had to set to avoid
// NPEs during the transformation.
String[] filesRequired = new String[] { "N" };
String[] includeSubfolders = new String[] { "N" };
textFileInputMeta.setFilenameField( "filename" );
textFileInputMeta.setEnclosure( "\"" );
textFileInputMeta.setAddResultFile( false );
textFileInputMeta.setFileName( new String[] { fileName } );
textFileInputMeta.setFileFormat( TestUtilities.getFileFormat() );
textFileInputMeta.setFileType( "CSV" );
textFileInputMeta.setSeparator( ";" );
textFileInputMeta.setFileRequired( filesRequired );
textFileInputMeta.setIncludeSubFolders( includeSubfolders );
textFileInputMeta.setInputFields( fields );
textFileInputMeta.setHeader( true );
textFileInputMeta.setNrHeaderLines( 1 );
textFileInputMeta.setFileCompression( "None" );
textFileInputMeta.setNoEmptyLines( true );
textFileInputMeta.setRowLimit( 0 );
textFileInputMeta.setAddResultFile( true );
textFileInputMeta.setDateFormatLocale( new Locale( "en_US" ) );
textFileInputMeta.setIncludeFilename( true );
return textFileInputStep;
}
private StepMeta createTextFileInputStep2( String name, String fileName, PluginRegistry registry ) {
// Create a Text File Input step
TextFileInputMeta textFileInputMeta = new TextFileInputMeta();
String textFileInputPid = registry.getPluginId( StepPluginType.class, textFileInputMeta );
StepMeta textFileInputStep = new StepMeta( textFileInputPid, name, textFileInputMeta );
// initialize the fields
TextFileInputField[] fields = new TextFileInputField[2];
for ( int idx = 0; idx < fields.length; idx++ ) {
fields[idx] = new TextFileInputField();
}
// populate the fields
// it is important that the setPosition(int)
// is invoked with the correct position as
// we are testing the reading of a delimited file.
fields[0].setName( "a" );
fields[0].setType( ValueMetaInterface.TYPE_INTEGER );
fields[0].setFormat( "" );
fields[0].setLength( 15 );
fields[0].setPrecision( -1 );
fields[0].setCurrencySymbol( "" );
fields[0].setDecimalSymbol( "" );
fields[0].setGroupSymbol( "" );
fields[0].setTrimType( ValueMetaInterface.TRIM_TYPE_NONE );
fields[0].setPosition( 1 );
fields[1].setName( "b" );
fields[1].setType( ValueMetaInterface.TYPE_INTEGER );
fields[1].setFormat( "" );
fields[1].setLength( 15 );
fields[1].setPrecision( -1 );
fields[1].setCurrencySymbol( "" );
fields[1].setDecimalSymbol( "" );
fields[1].setGroupSymbol( "" );
fields[1].setTrimType( ValueMetaInterface.TRIM_TYPE_NONE );
fields[1].setPosition( 2 );
// call this so that we allocate the arrays
// for files, fields and filters.
// we are testing one file and one set
// of fields. No filters
textFileInputMeta.allocate( 1, 1, 0 );
// set meta properties- these were determined by running Spoon
// and setting up the transformation we are setting up here.
// i.e. - the dialog told me what I had to set to avoid
// NPEs during the transformation.
String[] filesRequired = new String[] { "N" };
String[] includeSubfolders = new String[] { "N" };
textFileInputMeta.setEnclosure( "$[09]" );
textFileInputMeta.setAddResultFile( false );
textFileInputMeta.setFileName( new String[] { fileName } );
textFileInputMeta.setFileFormat( TestUtilities.getFileFormat() );
textFileInputMeta.setFileType( "CSV" );
textFileInputMeta.setSeparator( ";" );
textFileInputMeta.setFileRequired( filesRequired );
textFileInputMeta.setIncludeSubFolders( includeSubfolders );
textFileInputMeta.setInputFields( fields );
textFileInputMeta.setHeader( false );
textFileInputMeta.setNrHeaderLines( 0 );
textFileInputMeta.setFileCompression( "None" );
textFileInputMeta.setNoEmptyLines( true );
textFileInputMeta.setRowLimit( 0 );
textFileInputMeta.setDateFormatLocale( new Locale( "en_US" ) );
textFileInputMeta.setIncludeFilename( false );
return textFileInputStep;
}
public void testTextFileInput1() throws Exception {
KettleEnvironment.init();
// Create a new transformation...
//
TransMeta transMeta = new TransMeta();
transMeta.setName( "testTextFileInput1" );
PluginRegistry registry = PluginRegistry.getInstance();
// write the data that is to be read in
// by the step we are testing
String fileName = writeInputFile( 1 );
// create an injector step and add it to the trans meta
String injectorStepName = "injector step";
StepMeta injectorStep = TestUtilities.createInjectorStep( injectorStepName, registry );
transMeta.addStep( injectorStep );
// Create a Text File Input step
String testFileInputName = "text file input step";
StepMeta textFileInputStep = createTextFileInputStep( testFileInputName, fileName, registry );
transMeta.addStep( textFileInputStep );
// create a TransHopMeta for textFileInputStep and add it to the transMeta
TransHopMeta hopInputTextFile = new TransHopMeta( injectorStep, textFileInputStep );
transMeta.addTransHop( hopInputTextFile );
// Create a dummy step 1 and add it to the tranMeta
String dummyStepName = "dummy step";
StepMeta dummyStep = TestUtilities.createDummyStep( dummyStepName, registry );
transMeta.addStep( dummyStep );
// create transHopMeta for the hop from text file input to the dummy step
TransHopMeta hop_textFileInputStep_dummyStep = new TransHopMeta( textFileInputStep, dummyStep );
transMeta.addTransHop( hop_textFileInputStep_dummyStep );
// Now execute the transformation...
Trans trans = new Trans( transMeta );
trans.prepareExecution( null );
// create a row collector and add it to a row listener for the dummy step
StepInterface si = trans.getStepInterface( dummyStepName, 0 );
RowStepCollector dummyRowCollector = new RowStepCollector();
si.addRowListener( dummyRowCollector );
// Create a row producer for trans
RowProducer rowProducer = trans.addRowProducer( injectorStepName, 0 );
trans.startThreads();
// create the filename rows
List<RowMetaAndData> inputList = createData( fileName );
Iterator<RowMetaAndData> it = inputList.iterator();
while ( it.hasNext() ) {
RowMetaAndData rowMetaAndData = it.next();
rowProducer.putRow( rowMetaAndData.getRowMeta(), rowMetaAndData.getData() );
}
rowProducer.finished();
trans.waitUntilFinished();
// Compare the results
List<RowMetaAndData> resultRows = dummyRowCollector.getRowsWritten();
List<RowMetaAndData> goldenImageRows = createResultData1();
try {
TestUtilities.checkRows( goldenImageRows, resultRows, 5 );
} catch ( TestFailedException tfe ) {
fail( tfe.getMessage() );
}
}
public void testTextFileInput2() throws Exception {
KettleEnvironment.init();
// Create a new transformation...
//
TransMeta transMeta = new TransMeta();
transMeta.setName( "testTextFileInput2" );
PluginRegistry registry = PluginRegistry.getInstance();
// write the data that is to be read in
// by the step we are testing
String fileName = writeInputFile( 2 );
// Create a Text File Input step
String testFileInputName = "text file input step";
StepMeta textFileInputStep = createTextFileInputStep2( testFileInputName, fileName, registry );
transMeta.addStep( textFileInputStep );
// Create a dummy step 1 and add it to the tranMeta
String dummyStepName = "dummy step";
StepMeta dummyStep = TestUtilities.createDummyStep( dummyStepName, registry );
transMeta.addStep( dummyStep );
// create transHopMeta for the hop from text file input to the dummy step
TransHopMeta hop_textFileInputStep_dummyStep = new TransHopMeta( textFileInputStep, dummyStep );
transMeta.addTransHop( hop_textFileInputStep_dummyStep );
// Now execute the transformation...
Trans trans = new Trans( transMeta );
trans.prepareExecution( null );
// create a row collector and add it to a row listener for the dummy step
StepInterface si = trans.getStepInterface( dummyStepName, 0 );
RowStepCollector dummyRowCollector = new RowStepCollector();
si.addRowListener( dummyRowCollector );
trans.startThreads();
trans.waitUntilFinished();
// Compare the results
List<RowMetaAndData> resultRows = dummyRowCollector.getRowsWritten();
List<RowMetaAndData> goldenImageRows = createResultData2();
try {
TestUtilities.checkRows( goldenImageRows, resultRows, 0 );
} catch ( TestFailedException tfe ) {
fail( tfe.getMessage() );
}
}
/**
* Verify that lines are properly identified when parsing a mixed format file.
*/
public void testGetLine_FILE_FORMAT_MIXED() throws Exception {
String fileLocation = "testfiles/example.csv";
InputStream inputStream = KettleVFS.getInputStream( fileLocation );
InputStreamReader reader = new InputStreamReader( inputStream );
// Grab the first line and verify it only has 4 tokens instead of 24 (the total tokens in the file)
StringBuilder stringBuilder = new StringBuilder( 1000 );
String line = TextFileInput.getLine( null, reader, TextFileInputMeta.FILE_FORMAT_MIXED, stringBuilder );
CSVTokenizer csvt = new CSVTokenizer( line, ",", "\"" );
assertEquals( 4, csvt.countTokens() );
}
}