/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.api.java.io; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.File; import java.io.FileOutputStream; import java.io.OutputStreamWriter; import java.io.PrintStream; import org.apache.log4j.Level; import org.junit.BeforeClass; import org.junit.Test; import eu.stratosphere.configuration.Configuration; import eu.stratosphere.core.fs.FileInputSplit; import eu.stratosphere.core.fs.Path; import eu.stratosphere.util.LogUtils; public class TextInputFormatTest { @BeforeClass public static void initialize() { LogUtils.initializeDefaultConsoleLogger(Level.WARN); } @Test public void testSimpleRead() { final String FIRST = "First line"; final String SECOND = "Second line"; try { // create input file File tempFile = File.createTempFile("TextInputFormatTest", "tmp"); tempFile.deleteOnExit(); tempFile.setWritable(true); PrintStream ps = new PrintStream(tempFile); ps.println(FIRST); ps.println(SECOND); ps.close(); TextInputFormat inputFormat = new TextInputFormat(new Path(tempFile.toURI().toString())); Configuration parameters = new Configuration(); inputFormat.configure(parameters); FileInputSplit[] splits = inputFormat.createInputSplits(1); assertTrue("expected at least one input split", splits.length >= 1); inputFormat.open(splits[0]); String result = ""; assertFalse(inputFormat.reachedEnd()); result = inputFormat.nextRecord(""); assertNotNull("Expecting first record here", result); assertEquals(FIRST, result); assertFalse(inputFormat.reachedEnd()); result = inputFormat.nextRecord(result); assertNotNull("Expecting second record here", result); assertEquals(SECOND, result); assertTrue(inputFormat.reachedEnd() || null == inputFormat.nextRecord(result)); } catch (Throwable t) { System.err.println("test failed with exception: " + t.getMessage()); t.printStackTrace(System.err); fail("Test erroneous"); } } /** * This tests cases when line ends with \r\n and \n is used as delimiter, the last \r should be removed */ @Test public void testRemovingTrailingCR() { testRemovingTrailingCR("\n","\n"); testRemovingTrailingCR("\r\n","\n"); testRemovingTrailingCR("|","|"); testRemovingTrailingCR("|","\n"); } private void testRemovingTrailingCR(String lineBreaker,String delimiter) { File tempFile=null; String FIRST = "First line"; String SECOND = "Second line"; String CONTENT = FIRST + lineBreaker + SECOND + lineBreaker; try { // create input file tempFile = File.createTempFile("TextInputFormatTest", "tmp"); tempFile.deleteOnExit(); tempFile.setWritable(true); OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile)); wrt.write(CONTENT); wrt.close(); TextInputFormat inputFormat = new TextInputFormat(new Path(tempFile.toURI().toString())); inputFormat.setFilePath(tempFile.toURI().toString()); Configuration parameters = new Configuration(); inputFormat.configure(parameters); inputFormat.setDelimiter(delimiter); FileInputSplit[] splits = inputFormat.createInputSplits(1); inputFormat.open(splits[0]); String result = ""; if ( (delimiter.equals("\n") && (lineBreaker.equals("\n") || lineBreaker.equals("\r\n") ) ) || (lineBreaker.equals(delimiter)) ){ result = inputFormat.nextRecord(""); assertNotNull("Expecting first record here", result); assertEquals(FIRST, result); result = inputFormat.nextRecord(result); assertNotNull("Expecting second record here", result); assertEquals(SECOND, result); result = inputFormat.nextRecord(result); assertNull("The input file is over", result); }else{ result = inputFormat.nextRecord(""); assertNotNull("Expecting first record here", result); assertEquals(CONTENT, result); } } catch (Throwable t) { System.err.println("test failed with exception: " + t.getMessage()); t.printStackTrace(System.err); fail("Test erroneous"); } } }