/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.api.common.io;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStreamWriter;
import org.apache.log4j.Level;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.core.fs.FileInputSplit;
import eu.stratosphere.core.fs.Path;
import eu.stratosphere.types.Record;
import eu.stratosphere.types.StringValue;
import eu.stratosphere.util.LogUtils;
public class DelimitedInputFormatTest {
protected Configuration config;
protected File tempFile;
private final DelimitedInputFormat<Record> format = new MyTextInputFormat();
// --------------------------------------------------------------------------------------------
@BeforeClass
public static void initialize() {
LogUtils.initializeDefaultConsoleLogger(Level.WARN);
}
@Before
public void setup() {
this.format.setFilePath(new Path("file:///some/file/that/will/not/be/read"));
this.config = new Configuration();
}
@After
public void setdown() throws Exception {
if (this.format != null) {
this.format.close();
}
if (this.tempFile != null) {
this.tempFile.delete();
}
}
// --------------------------------------------------------------------------------------------
// --------------------------------------------------------------------------------------------
@Test
public void testConfigure() {
this.config.setString("delimited-format.delimiter", "\n");
format.configure(this.config);
assertEquals("\n", new String(format.getDelimiter()));
this.config.setString("delimited-format.delimiter", "&-&");
format.configure(this.config);
assertEquals("&-&", new String(format.getDelimiter()));
}
@Test
public void testSerialization() throws Exception {
final byte[] DELIMITER = new byte[] {1, 2, 3, 4};
final int NUM_LINE_SAMPLES = 7;
final int LINE_LENGTH_LIMIT = 12345;
final int BUFFER_SIZE = 178;
DelimitedInputFormat<Record> format = new MyTextInputFormat();
format.setDelimiter(DELIMITER);
format.setNumLineSamples(NUM_LINE_SAMPLES);
format.setLineLengthLimit(LINE_LENGTH_LIMIT);
format.setBufferSize(BUFFER_SIZE);
ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
ObjectOutputStream oos = new ObjectOutputStream(baos);
oos.writeObject(format);
oos.flush();
oos.close();
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray()));
@SuppressWarnings("unchecked")
DelimitedInputFormat<Record> deserialized = (DelimitedInputFormat<Record>) ois.readObject();
assertEquals(NUM_LINE_SAMPLES, deserialized.getNumLineSamples());
assertEquals(LINE_LENGTH_LIMIT, deserialized.getLineLengthLimit());
assertEquals(BUFFER_SIZE, deserialized.getBufferSize());
assertArrayEquals(DELIMITER, deserialized.getDelimiter());
}
@Test
public void testOpen() throws IOException {
final String myString = "my mocked line 1\nmy mocked line 2\n";
final FileInputSplit split = createTempFile(myString);
int bufferSize = 5;
format.setBufferSize(bufferSize);
format.open(split);
assertEquals(0, format.splitStart);
assertEquals(myString.length() - bufferSize, format.splitLength);
assertEquals(bufferSize, format.getBufferSize());
}
@Test
public void testRead() throws IOException {
final String myString = "my key|my val$$$my key2\n$$ctd.$$|my value2";
final FileInputSplit split = createTempFile(myString);
final Configuration parameters = new Configuration();
format.setDelimiter("$$$");
format.configure(parameters);
format.open(split);
Record theRecord = new Record();
assertNotNull(format.nextRecord(theRecord));
assertEquals("my key", theRecord.getField(0, StringValue.class).getValue());
assertEquals("my val", theRecord.getField(1, StringValue.class).getValue());
assertNotNull(format.nextRecord(theRecord));
assertEquals("my key2\n$$ctd.$$", theRecord.getField(0, StringValue.class).getValue());
assertEquals("my value2", theRecord.getField(1, StringValue.class).getValue());
assertNull(format.nextRecord(theRecord));
assertTrue(format.reachedEnd());
}
@Test
public void testRead2() throws IOException {
// 2. test case
final String myString = "my key|my val$$$my key2\n$$ctd.$$|my value2";
final FileInputSplit split = createTempFile(myString);
final Configuration parameters = new Configuration();
// default delimiter = '\n'
format.configure(parameters);
format.open(split);
Record theRecord = new Record();
assertNotNull(format.nextRecord(theRecord));
assertEquals("my key", theRecord.getField(0, StringValue.class).getValue());
assertEquals("my val$$$my key2", theRecord.getField(1, StringValue.class).getValue());
assertNotNull(format.nextRecord(theRecord));
assertEquals("$$ctd.$$", theRecord.getField(0, StringValue.class).getValue());
assertEquals("my value2", theRecord.getField(1, StringValue.class).getValue());
assertNull(format.nextRecord(theRecord));
assertTrue(format.reachedEnd());
}
private FileInputSplit createTempFile(String contents) throws IOException {
this.tempFile = File.createTempFile("test_contents", "tmp");
this.tempFile.deleteOnExit();
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(this.tempFile));
wrt.write(contents);
wrt.close();
return new FileInputSplit(0, new Path(this.tempFile.toURI().toString()), 0, this.tempFile.length(), new String[] {"localhost"});
}
protected static final class MyTextInputFormat extends eu.stratosphere.api.common.io.DelimitedInputFormat<Record> {
private static final long serialVersionUID = 1L;
private final StringValue str1 = new StringValue();
private final StringValue str2 = new StringValue();
@Override
public Record readRecord(Record reuse, byte[] bytes, int offset, int numBytes) {
String theRecord = new String(bytes, offset, numBytes);
str1.setValue(theRecord.substring(0, theRecord.indexOf('|')));
str2.setValue(theRecord.substring(theRecord.indexOf('|') + 1));
reuse.setField(0, str1);
reuse.setField(1, str2);
return reuse;
}
}
}