/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.csvinput; import org.junit.Assert; import org.mockito.Matchers; import org.mockito.Mockito; import org.junit.BeforeClass; import org.junit.Test; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleStepException; import org.pentaho.di.core.logging.LoggingObjectInterface; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.trans.step.RowAdapter; import org.pentaho.di.trans.steps.mock.StepMockHelper; import org.pentaho.di.trans.steps.textfileinput.TextFileInputField; /** * Tests for unicode support in CsvInput step * * @author Pavel Sakun * @see CsvInput */ public class CsvInputUnicodeTest extends CsvInputUnitTestBase { private static final String UTF8 = "UTF-8"; private static final String UTF16LE = "UTF-16LE"; private static final String UTF16LEBOM = "x-UTF-16LE-BOM"; private static final String UTF16BE = "UTF-16BE"; private static final String ONE_CHAR_DELIM = "\t"; private static final String MULTI_CHAR_DELIM = "|||"; private static final String TEXT = "Header1%1$sHeader2\nValue%1$sValue\nValue%1$sValue\n"; private static final String TEXT_WITH_ENCLOSURES = "Header1%1$sHeader2\n\"Value\"%1$s\"Value\"\n\"Value\"%1$s\"Value\"\n"; private static final String TEST_DATA = String.format( TEXT, ONE_CHAR_DELIM ); private static final String TEST_DATA1 = String.format( TEXT, MULTI_CHAR_DELIM ); private static final String TEST_DATA2 = String.format( TEXT_WITH_ENCLOSURES, ONE_CHAR_DELIM ); private static final String TEST_DATA3 = String.format( TEXT_WITH_ENCLOSURES, MULTI_CHAR_DELIM ); private static StepMockHelper<?, ?> stepMockHelper; @BeforeClass public static void setUp() throws KettleException { stepMockHelper = new StepMockHelper<CsvInputMeta, CsvInputData>( "CsvInputTest", CsvInputMeta.class, CsvInputData.class ); Mockito.when( stepMockHelper.logChannelInterfaceFactory.create( Matchers.any(), Matchers.any( LoggingObjectInterface.class ) ) ) .thenReturn( stepMockHelper.logChannelInterface ); Mockito.when( stepMockHelper.trans.isRunning() ).thenReturn( true ); } @Test public void testUTF16LE() throws Exception { doTest( UTF16LE, UTF16LE, TEST_DATA, ONE_CHAR_DELIM ); } @Test public void testUTF16BE() throws Exception { doTest( UTF16BE, UTF16BE, TEST_DATA, ONE_CHAR_DELIM ); } @Test public void testUTF16BE_multiDelim() throws Exception { doTest( UTF16BE, UTF16BE, TEST_DATA1, MULTI_CHAR_DELIM ); } @Test public void testUTF16LEBOM() throws Exception { doTest( UTF16LEBOM, UTF16LE, TEST_DATA, ONE_CHAR_DELIM ); } @Test public void testUTF8() throws Exception { doTest( UTF8, UTF8, TEST_DATA, ONE_CHAR_DELIM ); } @Test public void testUTF8_multiDelim() throws Exception { doTest( UTF8, UTF8, TEST_DATA1, MULTI_CHAR_DELIM ); } @Test public void testUTF16LEDataWithEnclosures() throws Exception { doTest( UTF16LE, UTF16LE, TEST_DATA2, ONE_CHAR_DELIM ); } @Test public void testUTF16BEDataWithEnclosures() throws Exception { doTest( UTF16BE, UTF16BE, TEST_DATA2, ONE_CHAR_DELIM ); } @Test public void testUTF16LEBOMDataWithEnclosures() throws Exception { doTest( UTF16LEBOM, UTF16LE, TEST_DATA2, ONE_CHAR_DELIM ); } @Test public void testUTF16BE_multiDelim_DataWithEnclosures() throws Exception { doTest( UTF16BE, UTF16BE, TEST_DATA3, MULTI_CHAR_DELIM ); } @Test public void testUTF16LE_multiDelim_DataWithEnclosures() throws Exception { doTest( UTF16LE, UTF16LE, TEST_DATA3, MULTI_CHAR_DELIM ); } @Test public void testUTF8_multiDelim_DataWithEnclosures() throws Exception { doTest( UTF8, UTF8, TEST_DATA3, MULTI_CHAR_DELIM ); } private void doTest( final String fileEncoding, final String stepEncoding, final String testData, final String delimiter ) throws Exception { String testFilePath = createTestFile( fileEncoding, testData ).getAbsolutePath(); CsvInputMeta meta = createStepMeta( testFilePath, stepEncoding, delimiter ); CsvInputData data = new CsvInputData(); CsvInput csvInput = new CsvInput( stepMockHelper.stepMeta, stepMockHelper.stepDataInterface, 0, stepMockHelper.transMeta, stepMockHelper.trans ); csvInput.init( meta, data ); csvInput.addRowListener( new RowAdapter() { @Override public void rowWrittenEvent( RowMetaInterface rowMeta, Object[] row ) throws KettleStepException { for ( int i = 0; i < rowMeta.size(); i++ ) { Assert.assertEquals( "Value", row[ i ] ); } } } ); boolean haveRowsToRead; do { haveRowsToRead = !csvInput.processRow( meta, data ); } while ( !haveRowsToRead ); csvInput.dispose( meta, data ); Assert.assertEquals( 2, csvInput.getLinesWritten() ); } private CsvInputMeta createStepMeta( final String testFilePath, final String encoding, final String delimiter ) { final CsvInputMeta meta = new CsvInputMeta(); meta.setFilename( testFilePath ); meta.setDelimiter( delimiter ); meta.setEncoding( encoding ); meta.setEnclosure( "\"" ); meta.setBufferSize( "50000" ); meta.setInputFields( getInputFileFields() ); meta.setHeaderPresent( true ); return meta; } private TextFileInputField[] getInputFileFields() { return createInputFileFields( "Header1", "Header2" ); } }