/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.api.common.io; import java.io.DataOutputStream; import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.List; import org.apache.log4j.Level; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; import eu.stratosphere.api.common.io.statistics.BaseStatistics; import eu.stratosphere.configuration.Configuration; import eu.stratosphere.core.fs.FileInputSplit; import eu.stratosphere.core.fs.Path; import eu.stratosphere.core.io.IOReadableWritable; import eu.stratosphere.types.IntValue; import eu.stratosphere.types.Record; import eu.stratosphere.types.StringValue; import eu.stratosphere.util.LogUtils; /** * Tests {@link SerializedInputFormat} and {@link SerializedOutputFormat}. */ @RunWith(Parameterized.class) public class SequentialFormatTest { public class InputSplitSorter implements Comparator<FileInputSplit> { @Override public int compare(FileInputSplit o1, FileInputSplit o2) { int pathOrder = o1.getPath().getName().compareTo(o2.getPath().getName()); return pathOrder == 0 ? Long.signum(o1.getStart() - o2.getStart()) : pathOrder; } } private int numberOfTuples; private long blockSize; private int degreeOfParallelism; private BlockInfo info = new SerializedInputFormat<IOReadableWritable>().createBlockInfo(); private int[] rawDataSizes; private File tempFile; @BeforeClass public static void initialize() { LogUtils.initializeDefaultConsoleLogger(Level.WARN); } /** * Initializes SequentialFormatTest. */ public SequentialFormatTest(int numberOfTuples, long blockSize, int degreeOfParallelism) { this.numberOfTuples = numberOfTuples; this.blockSize = blockSize; this.degreeOfParallelism = degreeOfParallelism; this.rawDataSizes = new int[degreeOfParallelism]; } /** * Count how many bytes would be written if all records were directly serialized */ @Before public void calcRawDataSize() throws IOException { int recordIndex = 0; for (int fileIndex = 0; fileIndex < this.degreeOfParallelism; fileIndex++) { ByteCounter byteCounter = new ByteCounter(); DataOutputStream out = new DataOutputStream(byteCounter); for (int fileCount = 0; fileCount < this.getNumberOfTuplesPerFile(fileIndex); fileCount++, recordIndex++) { this.getRecord(recordIndex).write(out); } this.rawDataSizes[fileIndex] = byteCounter.getLength(); } } /** * Checks if the expected input splits were created */ @Test public void checkInputSplits() throws IOException { FileInputSplit[] inputSplits = this.createInputFormat().createInputSplits(0); Arrays.sort(inputSplits, new InputSplitSorter()); int splitIndex = 0; for (int fileIndex = 0; fileIndex < this.degreeOfParallelism; fileIndex++) { List<FileInputSplit> sameFileSplits = new ArrayList<FileInputSplit>(); Path lastPath = inputSplits[splitIndex].getPath(); for (; splitIndex < inputSplits.length; splitIndex++) { if (!inputSplits[splitIndex].getPath().equals(lastPath)) { break; } sameFileSplits.add(inputSplits[splitIndex]); } Assert.assertEquals(this.getExpectedBlockCount(fileIndex), sameFileSplits.size()); long lastBlockLength = this.rawDataSizes[fileIndex] % (this.blockSize - this.info.getInfoSize()) + this.info.getInfoSize(); for (int index = 0; index < sameFileSplits.size(); index++) { Assert.assertEquals(this.blockSize * index, sameFileSplits.get(index).getStart()); if (index < sameFileSplits.size() - 1) { Assert.assertEquals(this.blockSize, sameFileSplits.get(index).getLength()); } } Assert.assertEquals(lastBlockLength, sameFileSplits.get(sameFileSplits.size() - 1).getLength()); } } /** * Tests if the expected sequence and amount of data can be read */ @Test public void checkRead() throws IOException { SerializedInputFormat<Record> input = this.createInputFormat(); FileInputSplit[] inputSplits = input.createInputSplits(0); Arrays.sort(inputSplits, new InputSplitSorter()); int readCount = 0; for (FileInputSplit inputSplit : inputSplits) { input.open(inputSplit); Record record = new Record(); while (!input.reachedEnd()) { if (input.nextRecord(record) != null) { this.checkEquals(this.getRecord(readCount), record); readCount++; } } } Assert.assertEquals(this.numberOfTuples, readCount); } /** * Tests the statistics of the given format. */ @Test public void checkStatistics() { SerializedInputFormat<Record> input = this.createInputFormat(); BaseStatistics statistics = input.getStatistics(null); Assert.assertEquals(this.numberOfTuples, statistics.getNumberOfRecords()); } @After public void cleanup() { this.deleteRecursively(this.tempFile); } private void deleteRecursively(File file) { if (file.isDirectory()) { for (File subFile : file.listFiles()) { this.deleteRecursively(subFile); } } else { file.delete(); } } /** * Write out the tuples in a temporary file and return it. */ @Before public void writeTuples() throws IOException { this.tempFile = File.createTempFile("SerializedInputFormat", null); this.tempFile.deleteOnExit(); Configuration configuration = new Configuration(); configuration.setLong(BinaryOutputFormat.BLOCK_SIZE_PARAMETER_KEY, this.blockSize); if (this.degreeOfParallelism == 1) { SerializedOutputFormat output = FormatUtil.openOutput(SerializedOutputFormat.class, this.tempFile.toURI().toString(), configuration); for (int index = 0; index < this.numberOfTuples; index++) { output.writeRecord(this.getRecord(index)); } output.close(); } else { this.tempFile.delete(); this.tempFile.mkdir(); int recordIndex = 0; for (int fileIndex = 0; fileIndex < this.degreeOfParallelism; fileIndex++) { SerializedOutputFormat output = FormatUtil.openOutput(SerializedOutputFormat.class, this.tempFile.toURI() + "/" + (fileIndex + 1), configuration); for (int fileCount = 0; fileCount < this.getNumberOfTuplesPerFile(fileIndex); fileCount++, recordIndex++) { output.writeRecord(this.getRecord(recordIndex)); } output.close(); } } } private int getNumberOfTuplesPerFile(int fileIndex) { return this.numberOfTuples / this.degreeOfParallelism; } /** * Tests if the length of the file matches the expected value. */ @Test public void checkLength() { File[] files = this.tempFile.isDirectory() ? this.tempFile.listFiles() : new File[] { this.tempFile }; Arrays.sort(files); for (int fileIndex = 0; fileIndex < this.degreeOfParallelism; fileIndex++) { long lastBlockLength = this.rawDataSizes[fileIndex] % (this.blockSize - this.info.getInfoSize()); long expectedLength = (this.getExpectedBlockCount(fileIndex) - 1) * this.blockSize + this.info.getInfoSize() + lastBlockLength; Assert.assertEquals(expectedLength, files[fileIndex].length()); } } protected SerializedInputFormat<Record> createInputFormat() { Configuration configuration = new Configuration(); configuration.setLong(BinaryInputFormat.BLOCK_SIZE_PARAMETER_KEY, this.blockSize); final SerializedInputFormat<Record> inputFormat = new SerializedInputFormat<Record>(); inputFormat.setFilePath(this.tempFile.toURI().toString()); inputFormat.configure(configuration); return inputFormat; } /** * Returns the record to write at the given position */ protected Record getRecord(int index) { return new Record(new IntValue(index), new StringValue(String.valueOf(index))); } /** * Checks if both records are equal */ private void checkEquals(Record expected, Record actual) { Assert.assertEquals(expected.getNumFields(), actual.getNumFields()); Assert.assertEquals(expected.getField(0, IntValue.class), actual.getField(0, IntValue.class)); Assert.assertEquals(expected.getField(1, StringValue.class), actual.getField(1, StringValue.class)); } private int getExpectedBlockCount(int fileIndex) { int expectedBlockCount = (int) Math.ceil((double) this.rawDataSizes[fileIndex] / (this.blockSize - this.info.getInfoSize())); return expectedBlockCount; } @Parameters public static List<Object[]> getParameters() { ArrayList<Object[]> params = new ArrayList<Object[]>(); for (int dop = 1; dop <= 2; dop++) { // numberOfTuples, blockSize, dop params.add(new Object[] { 100, BinaryOutputFormat.NATIVE_BLOCK_SIZE, dop }); params.add(new Object[] { 100, 1000, dop }); params.add(new Object[] { 100, 1 << 20, dop }); params.add(new Object[] { 10000, 1000, dop }); params.add(new Object[] { 10000, 1 << 20, dop }); } return params; } /** * Counts the bytes that would be written. * */ private static final class ByteCounter extends OutputStream { int length = 0; /** * Returns the length. * * @return the length */ public int getLength() { return this.length; } @Override public void write(int b) throws IOException { this.length++; } } }