/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.api.java.record.io;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import org.apache.log4j.Level;
import org.junit.BeforeClass;
import org.junit.Test;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.core.fs.FileInputSplit;
import eu.stratosphere.types.Record;
import eu.stratosphere.types.StringValue;
import eu.stratosphere.util.LogUtils;
public class TextInputFormatTest {
@BeforeClass
public static void initialize() {
LogUtils.initializeDefaultConsoleLogger(Level.WARN);
}
/**
* The TextInputFormat seems to fail reading more than one record. I guess its
* an off by one error.
*
* The easiest workaround is to setParameter(TextInputFormat.CHARSET_NAME, "ASCII");
* @throws IOException
*/
@Test
public void testPositionBug() {
final String FIRST = "First line";
final String SECOND = "Second line";
try {
// create input file
File tempFile = File.createTempFile("TextInputFormatTest", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
FileWriter writer = new FileWriter(tempFile);
writer.append(FIRST).append('\n');
writer.append(SECOND).append('\n');
writer.close();
TextInputFormat inputFormat = new TextInputFormat();
inputFormat.setFilePath(tempFile.toURI().toString());
Configuration parameters = new Configuration();
inputFormat.configure(parameters);
FileInputSplit[] splits = inputFormat.createInputSplits(1);
assertTrue("expected at least one input split", splits.length >= 1);
inputFormat.open(splits[0]);
Record r = new Record();
assertNotNull("Expecting first record here", inputFormat.nextRecord(r));
assertEquals(FIRST, r.getField(0, StringValue.class).getValue());
assertNotNull("Expecting second record here",inputFormat.nextRecord(r ));
assertEquals(SECOND, r.getField(0, StringValue.class).getValue());
assertNull("The input file is over", inputFormat.nextRecord(r));
}
catch (Throwable t) {
System.err.println("test failed with exception: " + t.getMessage());
t.printStackTrace(System.err);
fail("Test erroneous");
}
}
/**
* This tests cases when line ends with \r\n and \n is used as delimiter, the last \r should be removed
*/
@Test
public void testRemovingTrailingCR() {
testRemovingTrailingCR("\n","\n");
testRemovingTrailingCR("\r\n","\n");
testRemovingTrailingCR("|","|");
testRemovingTrailingCR("|","\n");
}
private void testRemovingTrailingCR(String lineBreaker,String delimiter) {
File tempFile=null;
String FIRST = "First line";
String SECOND = "Second line";
String CONTENT = FIRST + lineBreaker + SECOND + lineBreaker;
try {
// create input file
tempFile = File.createTempFile("TextInputFormatTest", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
wrt.write(CONTENT);
wrt.close();
TextInputFormat inputFormat = new TextInputFormat();
inputFormat.setFilePath(tempFile.toURI().toString());
Configuration parameters = new Configuration();
inputFormat.configure(parameters);
inputFormat.setDelimiter(delimiter);
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
Record r = new Record();
if ( (delimiter.equals("\n") && (lineBreaker.equals("\n") || lineBreaker.equals("\r\n") ) )
|| (lineBreaker.equals(delimiter)) ){
assertNotNull("Expecting first record here", inputFormat.nextRecord(r));
assertEquals(FIRST, r.getField(0, StringValue.class).getValue());
assertNotNull("Expecting second record here",inputFormat.nextRecord(r ));
assertEquals(SECOND, r.getField(0, StringValue.class).getValue());
assertNull("The input file is over", inputFormat.nextRecord(r));
}else{
assertNotNull("Expecting first record here", inputFormat.nextRecord(r));
assertEquals(CONTENT, r.getField(0, StringValue.class).getValue());
}
}
catch (Throwable t) {
System.err.println("test failed with exception: " + t.getMessage());
t.printStackTrace(System.err);
fail("Test erroneous");
}
}
}