/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.fileinput.text; import org.junit.Test; import org.pentaho.di.core.variables.Variables; import org.pentaho.di.trans.steps.fileinput.BaseFileInputField; public class TextFileInputContentParsingTest extends BaseTextParsingTest { @Test public void testDefaultOptions() throws Exception { initByFile( "default.csv" ); setFields( new BaseFileInputField(), new BaseFileInputField(), new BaseFileInputField() ); process(); check( new Object[][] { { "first", "1", "1.1" }, { "second", "2", "2.2" }, { "third", "3", "3.3" } } ); } @Test public void testSeparator() throws Exception { meta.content.separator = ","; initByFile( "separator.csv" ); setFields( new BaseFileInputField(), new BaseFileInputField(), new BaseFileInputField() ); process(); check( new Object[][] { { "first", "1", "1.1" }, { "second", "2", "2.2" }, { "third;third", "3", "3.3" } } ); } @Test public void testEscape() throws Exception { meta.content.escapeCharacter = "\\"; initByFile( "escape.csv" ); setFields( new BaseFileInputField(), new BaseFileInputField(), new BaseFileInputField() ); process(); check( new Object[][] { { "first", "1", "1.1" }, { "second", "2", "2.2" }, { "third;third", "3", "3.3" } } ); } @Test public void testHeader() throws Exception { meta.content.header = false; initByFile( "default.csv" ); setFields( new BaseFileInputField(), new BaseFileInputField(), new BaseFileInputField() ); process(); check( new Object[][] { { "Field 1", "Field 2", "Field 3" }, { "first", "1", "1.1" }, { "second", "2", "2.2" }, { "third", "3", "3.3" } } ); } @Test public void testGzipCompression() throws Exception { meta.content.fileCompression = "GZip"; initByFile( "default.csv.gz" ); setFields( new BaseFileInputField(), new BaseFileInputField(), new BaseFileInputField() ); process(); check( new Object[][] { { "first", "1", "1.1" }, { "second", "2", "2.2" }, { "third", "3", "3.3" } } ); } @Test public void testVfsGzipCompression() throws Exception { meta.content.fileCompression = "None"; String url = "gz:" + this.getClass().getResource( inPrefix + "default.csv.gz" ); initByURL( url ); setFields( new BaseFileInputField(), new BaseFileInputField(), new BaseFileInputField() ); process(); check( new Object[][] { { "first", "1", "1.1" }, { "second", "2", "2.2" }, { "third", "3", "3.3" } } ); } @Test public void testVfsBzip2Compression() throws Exception { meta.content.fileCompression = "None"; String url = "bz2:" + this.getClass().getResource( inPrefix + "default.csv.bz2" ); initByURL( url ); setFields( new BaseFileInputField(), new BaseFileInputField(), new BaseFileInputField() ); process(); check( new Object[][] { { "first", "1", "1.1" }, { "second", "2", "2.2" }, { "third", "3", "3.3" } } ); } @Test public void testFixedWidth() throws Exception { meta.content.fileType = "Fixed"; initByFile( "fixed.csv" ); setFields( new BaseFileInputField( "f1", 0, 7 ), new BaseFileInputField( "f2", 8, 7 ), new BaseFileInputField( "f3", 16, 7 ) ); process(); check( new Object[][] { { "first ", "1 ", "1.1" }, { "second ", "2 ", "2.2" }, { "third ", "3 ", "3.3" } } ); } @Test public void testFixedWidthBytes() throws Exception { meta.content.header = false; meta.content.fileType = "Fixed"; meta.content.fileFormat = "Unix"; meta.content.encoding = "Shift_JIS"; meta.content.length = "Bytes"; initByFile( "test-fixed-length-bytes.txt" ); setFields( new BaseFileInputField( "f1", 0, 5 ), new BaseFileInputField( "f2", 5, 3 ), new BaseFileInputField( "f3", 8, 1 ), new BaseFileInputField( "f4", 9, 3 ) ); process(); check( new Object[][] { { "1.000", "個 ", "T", "1.0" }, { "2.000", "M ", "Z", "1.0" } } ); } @Test public void testFixedWidthCharacters() throws Exception { meta.content.header = false; meta.content.fileType = "Fixed"; meta.content.fileFormat = "DOS"; meta.content.encoding = "ISO-8859-1"; meta.content.length = "Characters"; initByFile( "test-fixed-length-characters.txt" ); setFields( new BaseFileInputField( "f1", 0, 3 ), new BaseFileInputField( "f2", 3, 2 ), new BaseFileInputField( "f3", 5, 2 ), new BaseFileInputField( "f4", 7, 4 ) ); process(); check( new Object[][] { { "ABC", "DE", "FG", "HIJK" }, { "LmN", "oP", "qR", "sTuV" } } ); } @Test public void testFilterEmptyBacklog5381() throws Exception { meta.content.header = false; meta.content.fileType = "Fixed"; meta.content.noEmptyLines = true; meta.content.fileFormat = "mixed"; initByFile( "filterempty-BACKLOG-5381.csv" ); setFields( new BaseFileInputField( "f", 0, 100 ) ); process(); check( new Object[][] { { "FirstLine => FirstLine " }, { "ThirdLine => SecondLine" }, { "SixthLine => ThirdLine" }, { "NinthLine => FourthLine" }, { "" } } ); } @Test public void testFilterVariables() throws Exception { initByFile( "default.csv" ); Variables vars = new Variables(); vars.setVariable( "VAR_TEST", "second" ); data.filterProcessor = new TextFileFilterProcessor( new TextFileFilter[] { new TextFileFilter( 0, "${VAR_TEST}", false, false ) }, vars ); setFields( new BaseFileInputField(), new BaseFileInputField(), new BaseFileInputField() ); process(); check( new Object[][] { { "first", "1", "1.1" }, { "third", "3", "3.3" } } ); } @Test public void testBOM_UTF8() throws Exception { meta.content.encoding = "UTF-32LE"; meta.content.header = false; initByFile( "test-BOM-UTF-8.txt" ); setFields( new BaseFileInputField(), new BaseFileInputField() ); process(); check( new Object[][] { { "data", "1" } } ); } @Test public void testBOM_UTF16BE() throws Exception { meta.content.encoding = "UTF-32LE"; meta.content.header = false; initByFile( "test-BOM-UTF-16BE.txt" ); setFields( new BaseFileInputField(), new BaseFileInputField() ); process(); check( new Object[][] { { "data", "1" } } ); } }