/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.api.common.io; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.net.URI; import org.apache.log4j.Level; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import eu.stratosphere.api.common.io.FileInputFormat.FileBaseStatistics; import eu.stratosphere.api.common.io.statistics.BaseStatistics; import eu.stratosphere.configuration.Configuration; import eu.stratosphere.core.fs.FileInputSplit; import eu.stratosphere.testutils.TestFileUtils; import eu.stratosphere.types.IntValue; import eu.stratosphere.util.LogUtils; public class FileInputFormatTest { @BeforeClass public static void initialize() { LogUtils.initializeDefaultConsoleLogger(Level.ERROR); } @Test public void testGetStatisticsNonExistingFile() { try { final DummyFileInputFormat format = new DummyFileInputFormat(); format.setFilePath("file:///some/none/existing/directory/"); format.configure(new Configuration()); BaseStatistics stats = format.getStatistics(null); Assert.assertNull("The file statistics should be null.", stats); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } @Test public void testGetStatisticsOneFileNoCachedVersion() { try { final long SIZE = 1024 * 500; String tempFile = TestFileUtils.createTempFile(SIZE); final DummyFileInputFormat format = new DummyFileInputFormat(); format.setFilePath(tempFile); format.configure(new Configuration()); BaseStatistics stats = format.getStatistics(null); Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize()); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } @Test public void testGetStatisticsMultipleFilesNoCachedVersion() { try { final long SIZE1 = 2077; final long SIZE2 = 31909; final long SIZE3 = 10; final long TOTAL = SIZE1 + SIZE2 + SIZE3; String tempDir = TestFileUtils.createTempFileDir(SIZE1, SIZE2, SIZE3); final DummyFileInputFormat format = new DummyFileInputFormat(); format.setFilePath(tempDir); format.configure(new Configuration()); BaseStatistics stats = format.getStatistics(null); Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize()); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } @Test public void testGetStatisticsOneFileWithCachedVersion() { try { final long SIZE = 50873; final long FAKE_SIZE = 10065; String tempFile = TestFileUtils.createTempFile(SIZE); DummyFileInputFormat format = new DummyFileInputFormat(); format.setFilePath(tempFile); format.configure(new Configuration()); FileBaseStatistics stats = format.getStatistics(null); Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize()); format = new DummyFileInputFormat(); format.setFilePath(tempFile); format.configure(new Configuration()); FileBaseStatistics newStats = format.getStatistics(stats); Assert.assertTrue("Statistics object was changed", newStats == stats); // insert fake stats with the correct modification time. the call should return the fake stats format = new DummyFileInputFormat(); format.setFilePath(tempFile); format.configure(new Configuration()); FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); BaseStatistics latest = format.getStatistics(fakeStats); Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize()); // insert fake stats with the expired modification time. the call should return new accurate stats format = new DummyFileInputFormat(); format.setFilePath(tempFile); format.configure(new Configuration()); FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime()-1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); BaseStatistics reGathered = format.getStatistics(outDatedFakeStats); Assert.assertEquals("The file size from the statistics is wrong.", SIZE, reGathered.getTotalInputSize()); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } @Test public void testGetStatisticsMultipleFilesWithCachedVersion() { try { final long SIZE1 = 2077; final long SIZE2 = 31909; final long SIZE3 = 10; final long TOTAL = SIZE1 + SIZE2 + SIZE3; final long FAKE_SIZE = 10065; String tempDir = TestFileUtils.createTempFileDir(SIZE1, SIZE2, SIZE3); DummyFileInputFormat format = new DummyFileInputFormat(); format.setFilePath(tempDir); format.configure(new Configuration()); FileBaseStatistics stats = format.getStatistics(null); Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize()); format = new DummyFileInputFormat(); format.setFilePath(tempDir); format.configure(new Configuration()); FileBaseStatistics newStats = format.getStatistics(stats); Assert.assertTrue("Statistics object was changed", newStats == stats); // insert fake stats with the correct modification time. the call should return the fake stats format = new DummyFileInputFormat(); format.setFilePath(tempDir); format.configure(new Configuration()); FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); BaseStatistics latest = format.getStatistics(fakeStats); Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize()); // insert fake stats with the correct modification time. the call should return the fake stats format = new DummyFileInputFormat(); format.setFilePath(tempDir); format.configure(new Configuration()); FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime()-1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); BaseStatistics reGathered = format.getStatistics(outDatedFakeStats); Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, reGathered.getTotalInputSize()); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } // ---- Tests for .deflate --------- /** * Create directory with files with .deflate extension and see if it creates a split * for each file. Each split has to start from the beginning. */ @Test public void testFileInputSplit() { try { String tempFile = TestFileUtils.createTempFileDirExtension(".deflate", "some", "stupid", "meaningless", "files"); final DummyFileInputFormat format = new DummyFileInputFormat(); format.setFilePath(tempFile); format.configure(new Configuration()); FileInputSplit[] splits = format.createInputSplits(2); Assert.assertEquals(4, splits.length); for(FileInputSplit split : splits) { Assert.assertEquals(-1L, split.getLength()); // unsplittable deflate files have this size as a flag for "read whole file" Assert.assertEquals(0L, split.getStart()); // always read from the beginning. } // test if this also works for "mixed" directories TestFileUtils.createTempFileInDirectory(tempFile.replace("file:", ""), "this creates a test file with a random extension (at least not .deflate)"); final DummyFileInputFormat formatMixed = new DummyFileInputFormat(); formatMixed.setFilePath(tempFile); formatMixed.configure(new Configuration()); FileInputSplit[] splitsMixed = formatMixed.createInputSplits(2); Assert.assertEquals(5, splitsMixed.length); for(FileInputSplit split : splitsMixed) { if(split.getPath().getName().endsWith(".deflate")) { Assert.assertEquals(-1L, split.getLength()); // unsplittable deflate files have this size as a flag for "read whole file" Assert.assertEquals(0L, split.getStart()); // always read from the beginning. } else { Assert.assertEquals(0L, split.getStart()); Assert.assertTrue("split size not correct", split.getLength() > 0); } } } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } @Test public void testIgnoredUnderscoreFiles() { try { final String contents = "CONTENTS"; // create some accepted, some ignored files File tempDir = new File(System.getProperty("java.io.tmpdir")); File f = null; do { f = new File(tempDir, TestFileUtils.randomFileName("")); } while (f.exists()); f.mkdirs(); f.deleteOnExit(); File child1 = new File(f, "dataFile1.txt"); File child2 = new File(f, "another_file.bin"); File luigiFile = new File(f, "_luigi"); File success = new File(f, "_SUCCESS"); File[] files = { child1, child2, luigiFile, success }; for (File child : files) { child.deleteOnExit(); BufferedWriter out = new BufferedWriter(new FileWriter(child)); try { out.write(contents); } finally { out.close(); } } // test that only the valid files are accepted final DummyFileInputFormat format = new DummyFileInputFormat(); format.setFilePath(f.toURI().toString()); format.configure(new Configuration()); FileInputSplit[] splits = format.createInputSplits(1); Assert.assertEquals(2, splits.length); final URI uri1 = splits[0].getPath().toUri(); final URI uri2 = splits[1].getPath().toUri(); final URI childUri1 = child1.toURI(); final URI childUri2 = child2.toURI(); Assert.assertTrue( (uri1.equals(childUri1) && uri2.equals(childUri2)) || (uri1.equals(childUri2) && uri2.equals(childUri1)) ); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); Assert.fail(e.getMessage()); } } // ------------------------------------------------------------------------ private class DummyFileInputFormat extends FileInputFormat<IntValue> { private static final long serialVersionUID = 1L; @Override public boolean reachedEnd() throws IOException { return true; } @Override public IntValue nextRecord(IntValue record) throws IOException { return null; } } }