/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.textfileinput; import static org.junit.Assert.assertEquals; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.util.Date; import java.util.List; import org.apache.commons.io.IOUtils; import org.junit.BeforeClass; import org.junit.Test; import org.mockito.Mockito; import org.pentaho.di.core.KettleEnvironment; import org.pentaho.di.core.exception.KettleFileException; import org.pentaho.di.core.exception.KettleValueException; import org.pentaho.di.core.fileinput.FileInputList; import org.pentaho.di.core.logging.LogChannelInterface; import org.pentaho.di.core.playlist.FilePlayListAll; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.row.value.ValueMetaString; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.trans.TransTestingUtil; import org.pentaho.di.trans.step.errorhandling.FileErrorHandler; import org.pentaho.di.trans.steps.StepMockUtil; import org.pentaho.di.utils.TestUtils; /** * @deprecated replaced by implementation in the ...steps.fileinput.text package */ public class TextFileInputTest { @BeforeClass public static void initKettle() throws Exception { KettleEnvironment.init(); } private static InputStreamReader getInputStreamReader( String data ) throws UnsupportedEncodingException { return new InputStreamReader( new ByteArrayInputStream( data.getBytes( ( "UTF-8" ) ) ) ); } @Test public void testGetLineDOS() throws KettleFileException, UnsupportedEncodingException { String input = "col1\tcol2\tcol3\r\ndata1\tdata2\tdata3\r\n"; String expected = "col1\tcol2\tcol3"; String output = TextFileInput.getLine( null, getInputStreamReader( input ), TextFileInputMeta.FILE_FORMAT_DOS, new StringBuilder( 1000 ) ); assertEquals( expected, output ); } @Test public void testGetLineUnix() throws KettleFileException, UnsupportedEncodingException { String input = "col1\tcol2\tcol3\ndata1\tdata2\tdata3\n"; String expected = "col1\tcol2\tcol3"; String output = TextFileInput.getLine( null, getInputStreamReader( input ), TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ); assertEquals( expected, output ); } @Test public void testGetLineOSX() throws KettleFileException, UnsupportedEncodingException { String input = "col1\tcol2\tcol3\rdata1\tdata2\tdata3\r"; String expected = "col1\tcol2\tcol3"; String output = TextFileInput.getLine( null, getInputStreamReader( input ), TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ); assertEquals( expected, output ); } @Test public void testGetLineMixed() throws KettleFileException, UnsupportedEncodingException { String input = "col1\tcol2\tcol3\r\ndata1\tdata2\tdata3\r"; String expected = "col1\tcol2\tcol3"; String output = TextFileInput.getLine( null, getInputStreamReader( input ), TextFileInputMeta.FILE_FORMAT_MIXED, new StringBuilder( 1000 ) ); assertEquals( expected, output ); } @Test( timeout = 100 ) public void test_PDI695() throws KettleFileException, UnsupportedEncodingException { String inputDOS = "col1\tcol2\tcol3\r\ndata1\tdata2\tdata3\r\n"; String inputUnix = "col1\tcol2\tcol3\ndata1\tdata2\tdata3\n"; String inputOSX = "col1\tcol2\tcol3\rdata1\tdata2\tdata3\r"; String expected = "col1\tcol2\tcol3"; assertEquals( expected, TextFileInput.getLine( null, getInputStreamReader( inputDOS ), TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ) ); assertEquals( expected, TextFileInput.getLine( null, getInputStreamReader( inputUnix ), TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ) ); assertEquals( expected, TextFileInput.getLine( null, getInputStreamReader( inputOSX ), TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ) ); } @Test public void readWrappedInputWithoutHeaders() throws Exception { final String content = new StringBuilder() .append( "r1c1" ).append( '\n' ).append( ";r1c2\n" ) .append( "r2c1" ).append( '\n' ).append( ";r2c2" ) .toString(); final String virtualFile = createVirtualFile( "pdi-2607.txt", content ); TextFileInputMeta meta = new TextFileInputMeta(); meta.setLineWrapped( true ); meta.setNrWraps( 1 ); meta.setInputFields( new TextFileInputField[] { field( "col1" ), field( "col2" ) } ); meta.setFileCompression( "None" ); meta.setFileType( "CSV" ); meta.setHeader( false ); meta.setNrHeaderLines( -1 ); meta.setFooter( false ); meta.setNrFooterLines( -1 ); TextFileInputData data = new TextFileInputData(); data.setFiles( new FileInputList() ); data.getFiles().addFile( KettleVFS.getFileObject( virtualFile ) ); data.outputRowMeta = new RowMeta(); data.outputRowMeta.addValueMeta( new ValueMetaString( "col1" ) ); data.outputRowMeta.addValueMeta( new ValueMetaString( "col2" ) ); data.dataErrorLineHandler = Mockito.mock( FileErrorHandler.class ); data.fileFormatType = TextFileInputMeta.FILE_FORMAT_UNIX; data.separator = ";"; data.filterProcessor = new TextFileFilterProcessor( new TextFileFilter[ 0 ] ); data.filePlayList = new FilePlayListAll(); TextFileInput input = StepMockUtil.getStep( TextFileInput.class, TextFileInputMeta.class, "test" ); List<Object[]> output = TransTestingUtil.execute( input, meta, data, 2, false ); TransTestingUtil.assertResult( new Object[] { "r1c1", "r1c2" }, output.get( 0 ) ); TransTestingUtil.assertResult( new Object[] { "r2c1", "r2c2" }, output.get( 1 ) ); deleteVfsFile( virtualFile ); } @Test public void readInputWithMissedValues() throws Exception { final String virtualFile = createVirtualFile( "pdi-14172.txt", "1,1,1\n", "2,,2\n" ); TextFileInputMeta meta = new TextFileInputMeta(); TextFileInputField field2 = field( "col2" ); field2.setRepeated( true ); meta.setInputFields( new TextFileInputField[] { field( "col1" ), field2, field( "col3" ) } ); meta.setFileCompression( "None" ); meta.setFileType( "CSV" ); meta.setHeader( false ); meta.setNrHeaderLines( -1 ); meta.setFooter( false ); meta.setNrFooterLines( -1 ); TextFileInputData data = new TextFileInputData(); data.setFiles( new FileInputList() ); data.getFiles().addFile( KettleVFS.getFileObject( virtualFile ) ); data.outputRowMeta = new RowMeta(); data.outputRowMeta.addValueMeta( new ValueMetaString( "col1" ) ); data.outputRowMeta.addValueMeta( new ValueMetaString( "col2" ) ); data.outputRowMeta.addValueMeta( new ValueMetaString( "col3" ) ); data.dataErrorLineHandler = Mockito.mock( FileErrorHandler.class ); data.fileFormatType = TextFileInputMeta.FILE_FORMAT_UNIX; data.separator = ","; data.filterProcessor = new TextFileFilterProcessor( new TextFileFilter[ 0 ] ); data.filePlayList = new FilePlayListAll(); TextFileInput input = StepMockUtil.getStep( TextFileInput.class, TextFileInputMeta.class, "test" ); List<Object[]> output = TransTestingUtil.execute( input, meta, data, 2, false ); TransTestingUtil.assertResult( new Object[] { "1", "1", "1" }, output.get( 0 ) ); TransTestingUtil.assertResult( new Object[] { "2", "1", "2" }, output.get( 1 ) ); deleteVfsFile( virtualFile ); } @Test public void readInputWithDefaultValues() throws Exception { final String virtualFile = createVirtualFile( "pdi-14832.txt", "1,\n" ); TextFileInputMeta meta = new TextFileInputMeta(); TextFileInputField field2 = field( "col2" ); field2.setIfNullValue( "DEFAULT" ); meta.setInputFields( new TextFileInputField[] { field( "col1" ), field2 } ); meta.setFileCompression( "None" ); meta.setFileType( "CSV" ); meta.setHeader( false ); meta.setNrHeaderLines( -1 ); meta.setFooter( false ); meta.setNrFooterLines( -1 ); TextFileInputData data = new TextFileInputData(); data.setFiles( new FileInputList() ); data.getFiles().addFile( KettleVFS.getFileObject( virtualFile ) ); data.outputRowMeta = new RowMeta(); data.outputRowMeta.addValueMeta( new ValueMetaString( "col1" ) ); data.outputRowMeta.addValueMeta( new ValueMetaString( "col2" ) ); data.dataErrorLineHandler = Mockito.mock( FileErrorHandler.class ); data.fileFormatType = TextFileInputMeta.FILE_FORMAT_UNIX; data.separator = ","; data.filterProcessor = new TextFileFilterProcessor( new TextFileFilter[ 0 ] ); data.filePlayList = new FilePlayListAll(); TextFileInput input = StepMockUtil.getStep( TextFileInput.class, TextFileInputMeta.class, "test" ); List<Object[]> output = TransTestingUtil.execute( input, meta, data, 1, false ); TransTestingUtil.assertResult( new Object[] { "1", "DEFAULT" }, output.get( 0 ) ); deleteVfsFile( virtualFile ); } private static String createVirtualFile( String filename, String... rows ) throws Exception { String virtualFile = TestUtils.createRamFile( filename ); StringBuilder content = new StringBuilder(); if ( rows != null ) { for ( String row : rows ) { content.append( row ); } } ByteArrayOutputStream bos = new ByteArrayOutputStream(); bos.write( content.toString().getBytes() ); OutputStream os = KettleVFS.getFileObject( virtualFile ).getContent().getOutputStream(); try { IOUtils.copy( new ByteArrayInputStream( bos.toByteArray() ), os ); } finally { os.close(); } return virtualFile; } private static void deleteVfsFile( String path ) throws Exception { TestUtils.getFileObject( path ).delete(); } private static TextFileInputField field( String name ) { return new TextFileInputField( name, -1, -1 ); } /** * PDI-14390 Text file input throws NPE if skipping error rows and passing through incoming fieds * * @throws Exception */ @Test public void convertLineToRowTest() throws Exception { LogChannelInterface log = Mockito.mock( LogChannelInterface.class ); TextFileLine textFileLine = Mockito.mock( TextFileLine.class ); textFileLine.line = "testData1;testData2;testData3"; InputFileMetaInterface info = Mockito.mock( InputFileMetaInterface.class ); TextFileInputField[] textFileInputFields = { new TextFileInputField(), new TextFileInputField(), new TextFileInputField() }; Mockito.doReturn( textFileInputFields ).when( info ).getInputFields(); Mockito.doReturn( "CSV" ).when( info ).getFileType(); Mockito.doReturn( "/" ).when( info ).getEscapeCharacter(); Mockito.doReturn( true ).when( info ).isErrorIgnored(); Mockito.doReturn( true ).when( info ).isErrorLineSkipped(); RowMetaInterface outputRowMeta = Mockito.mock( RowMetaInterface.class ); Mockito.doReturn( 15 ).when( outputRowMeta ).size(); ValueMetaInterface valueMetaWithError = Mockito.mock( ValueMetaInterface.class ); Mockito.doThrow( new KettleValueException( "Error converting" ) ).when( valueMetaWithError ).convertDataFromString( Mockito.anyString(), Mockito.any( ValueMetaInterface.class ), Mockito.anyString(), Mockito.anyString(), Mockito.anyInt() ); Mockito.doReturn( valueMetaWithError ).when( outputRowMeta ).getValueMeta( Mockito.anyInt() ); //it should run without NPE TextFileInput.convertLineToRow( log, textFileLine, info, new Object[3], 1, outputRowMeta, Mockito.mock( RowMetaInterface.class ), null, 1L, ";", null, "/", Mockito.mock( FileErrorHandler.class ), false, false, false, false, false, false, false, false, null, null, false, new Date(), null, null, null, 1L ); } }