/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.fileinput.text;
import static org.junit.Assert.assertEquals;
import static org.mockito.Mockito.mock;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.junit.BeforeClass;
import org.junit.Test;
import org.pentaho.di.core.KettleEnvironment;
import org.pentaho.di.core.exception.KettleFileException;
import org.pentaho.di.core.fileinput.FileInputList;
import org.pentaho.di.core.playlist.FilePlayListAll;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.value.ValueMetaString;
import org.pentaho.di.core.variables.Variables;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.trans.TransTestingUtil;
import org.pentaho.di.trans.step.errorhandling.FileErrorHandler;
import org.pentaho.di.trans.steps.StepMockUtil;
import org.pentaho.di.trans.steps.fileinput.BaseFileInputField;
import org.pentaho.di.utils.TestUtils;
public class TextFileInputTest {
@BeforeClass
public static void initKettle() throws Exception {
KettleEnvironment.init();
}
private static InputStreamReader getInputStreamReader( String data ) throws UnsupportedEncodingException {
return new InputStreamReader( new ByteArrayInputStream( data.getBytes( ( "UTF-8" ) ) ) );
}
@Test
public void testGetLineDOS() throws KettleFileException, UnsupportedEncodingException {
String input = "col1\tcol2\tcol3\r\ndata1\tdata2\tdata3\r\n";
String expected = "col1\tcol2\tcol3";
String output =
TextFileInputUtils.getLine( null, getInputStreamReader( input ), TextFileInputMeta.FILE_FORMAT_DOS,
new StringBuilder( 1000 ) );
assertEquals( expected, output );
}
@Test
public void testGetLineUnix() throws KettleFileException, UnsupportedEncodingException {
String input = "col1\tcol2\tcol3\ndata1\tdata2\tdata3\n";
String expected = "col1\tcol2\tcol3";
String output =
TextFileInputUtils.getLine( null, getInputStreamReader( input ), TextFileInputMeta.FILE_FORMAT_UNIX,
new StringBuilder( 1000 ) );
assertEquals( expected, output );
}
@Test
public void testGetLineOSX() throws KettleFileException, UnsupportedEncodingException {
String input = "col1\tcol2\tcol3\rdata1\tdata2\tdata3\r";
String expected = "col1\tcol2\tcol3";
String output =
TextFileInputUtils.getLine( null, getInputStreamReader( input ), TextFileInputMeta.FILE_FORMAT_UNIX,
new StringBuilder( 1000 ) );
assertEquals( expected, output );
}
@Test
public void testGetLineMixed() throws KettleFileException, UnsupportedEncodingException {
String input = "col1\tcol2\tcol3\r\ndata1\tdata2\tdata3\r";
String expected = "col1\tcol2\tcol3";
String output =
TextFileInputUtils.getLine( null, getInputStreamReader( input ), TextFileInputMeta.FILE_FORMAT_MIXED,
new StringBuilder( 1000 ) );
assertEquals( expected, output );
}
@Test( timeout = 100 )
public void test_PDI695() throws KettleFileException, UnsupportedEncodingException {
String inputDOS = "col1\tcol2\tcol3\r\ndata1\tdata2\tdata3\r\n";
String inputUnix = "col1\tcol2\tcol3\ndata1\tdata2\tdata3\n";
String inputOSX = "col1\tcol2\tcol3\rdata1\tdata2\tdata3\r";
String expected = "col1\tcol2\tcol3";
assertEquals( expected, TextFileInputUtils.getLine( null, getInputStreamReader( inputDOS ),
TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ) );
assertEquals( expected, TextFileInputUtils.getLine( null, getInputStreamReader( inputUnix ),
TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ) );
assertEquals( expected, TextFileInputUtils.getLine( null, getInputStreamReader( inputOSX ),
TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ) );
}
@Test
public void readWrappedInputWithoutHeaders() throws Exception {
final String content = new StringBuilder()
.append( "r1c1" ).append( '\n' ).append( ";r1c2\n" )
.append( "r2c1" ).append( '\n' ).append( ";r2c2" )
.toString();
final String virtualFile = createVirtualFile( "pdi-2607.txt", content );
TextFileInputMeta meta = createMetaObject( field( "col1" ), field( "col2" ) );
meta.content.lineWrapped = true;
meta.content.nrWraps = 1;
TextFileInputData data = createDataObject( virtualFile, ";", "col1", "col2" );
TextFileInput input = StepMockUtil.getStep( TextFileInput.class, TextFileInputMeta.class, "test" );
List<Object[]> output = TransTestingUtil.execute( input, meta, data, 2, false );
TransTestingUtil.assertResult( new Object[] { "r1c1", "r1c2" }, output.get( 0 ) );
TransTestingUtil.assertResult( new Object[] { "r2c1", "r2c2" }, output.get( 1 ) );
deleteVfsFile( virtualFile );
}
@Test
public void readInputWithMissedValues() throws Exception {
final String virtualFile = createVirtualFile( "pdi-14172.txt", "1,1,1\n", "2,,2\n" );
BaseFileInputField field2 = field( "col2" );
field2.setRepeated( true );
TextFileInputMeta meta = createMetaObject( field( "col1" ), field2, field( "col3" ) );
TextFileInputData data = createDataObject( virtualFile, ",", "col1", "col2", "col3" );
TextFileInput input = StepMockUtil.getStep( TextFileInput.class, TextFileInputMeta.class, "test" );
List<Object[]> output = TransTestingUtil.execute( input, meta, data, 2, false );
TransTestingUtil.assertResult( new Object[] { "1", "1", "1" }, output.get( 0 ) );
TransTestingUtil.assertResult( new Object[] { "2", "1", "2" }, output.get( 1 ) );
deleteVfsFile( virtualFile );
}
@Test
public void readInputWithNonEmptyNullif() throws Exception {
final String virtualFile = createVirtualFile( "pdi-14358.txt", "-,-\n" );
BaseFileInputField col2 = field( "col2" );
col2.setNullString( "-" );
TextFileInputMeta meta = createMetaObject( field( "col1" ), col2 );
TextFileInputData data = createDataObject( virtualFile, ",", "col1", "col2" );
TextFileInput input = StepMockUtil.getStep( TextFileInput.class, TextFileInputMeta.class, "test" );
List<Object[]> output = TransTestingUtil.execute( input, meta, data, 1, false );
TransTestingUtil.assertResult( new Object[] { "-" }, output.get( 0 ) );
deleteVfsFile( virtualFile );
}
@Test
public void readInputWithDefaultValues() throws Exception {
final String virtualFile = createVirtualFile( "pdi-14832.txt", "1,\n" );
BaseFileInputField col2 = field( "col2" );
col2.setIfNullValue( "DEFAULT" );
TextFileInputMeta meta = createMetaObject( field( "col1" ), col2 );
TextFileInputData data = createDataObject( virtualFile, ",", "col1", "col2" );
TextFileInput input = StepMockUtil.getStep( TextFileInput.class, TextFileInputMeta.class, "test" );
List<Object[]> output = TransTestingUtil.execute( input, meta, data, 1, false );
TransTestingUtil.assertResult( new Object[] { "1", "DEFAULT" }, output.get( 0 ) );
deleteVfsFile( virtualFile );
}
private TextFileInputMeta createMetaObject( BaseFileInputField... fields ) {
TextFileInputMeta meta = new TextFileInputMeta();
meta.content.fileCompression = "None";
meta.content.fileType = "CSV";
meta.content.header = false;
meta.content.nrHeaderLines = -1;
meta.content.footer = false;
meta.content.nrFooterLines = -1;
meta.inputFiles.inputFields = fields;
return meta;
}
private TextFileInputData createDataObject( String file,
String separator,
String... outputFields ) throws Exception {
TextFileInputData data = new TextFileInputData();
data.files = new FileInputList();
data.files.addFile( KettleVFS.getFileObject( file ) );
data.separator = separator;
data.outputRowMeta = new RowMeta();
if ( outputFields != null ) {
for ( String field : outputFields ) {
data.outputRowMeta.addValueMeta( new ValueMetaString( field ) );
}
}
data.dataErrorLineHandler = mock( FileErrorHandler.class );
data.fileFormatType = TextFileInputMeta.FILE_FORMAT_UNIX;
data.filterProcessor = new TextFileFilterProcessor( new TextFileFilter[ 0 ], new Variables() );
data.filePlayList = new FilePlayListAll();
return data;
}
private static String createVirtualFile( String filename, String... rows ) throws Exception {
String virtualFile = TestUtils.createRamFile( filename );
StringBuilder content = new StringBuilder();
if ( rows != null ) {
for ( String row : rows ) {
content.append( row );
}
}
ByteArrayOutputStream bos = new ByteArrayOutputStream();
bos.write( content.toString().getBytes() );
try ( OutputStream os = KettleVFS.getFileObject( virtualFile ).getContent().getOutputStream() ) {
IOUtils.copy( new ByteArrayInputStream( bos.toByteArray() ), os );
}
return virtualFile;
}
private static void deleteVfsFile( String path ) throws Exception {
TestUtils.getFileObject( path ).delete();
}
private static BaseFileInputField field( String name ) {
return new BaseFileInputField( name, -1, -1 );
}
}