/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.api.common.io; import java.io.File; import java.io.IOException; import org.apache.flink.api.common.io.statistics.BaseStatistics; import org.apache.flink.configuration.Configuration; import org.apache.flink.core.fs.FileInputSplit; import org.apache.flink.core.fs.Path; import org.apache.flink.testutils.TestFileUtils; import org.apache.flink.types.IntValue; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; public class EnumerateNestedFilesTest { protected Configuration config; final String tempPath = System.getProperty("java.io.tmpdir"); private DummyFileInputFormat format; @Before public void setup() { this.config = new Configuration(); format = new DummyFileInputFormat(); } @After public void setdown() throws Exception { if (this.format != null) { this.format.close(); } } /** * Test without nested directory and recursive.file.enumeration = true */ @Test public void testNoNestedDirectoryTrue() { try { String filePath = TestFileUtils.createTempFile("foo"); this.format.setFilePath(new Path(filePath)); this.config.setBoolean("recursive.file.enumeration", true); format.configure(this.config); FileInputSplit[] splits = format.createInputSplits(1); Assert.assertEquals(1, splits.length); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } /** * Test with one nested directory and recursive.file.enumeration = true */ @Test public void testOneNestedDirectoryTrue() { try { String firstLevelDir = TestFileUtils.randomFileName(); String secondLevelDir = TestFileUtils.randomFileName(); File nestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir); nestedDir.mkdirs(); nestedDir.deleteOnExit(); File insideNestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir + System.getProperty("file.separator") + secondLevelDir); insideNestedDir.mkdirs(); insideNestedDir.deleteOnExit(); // create a file in the first-level and two files in the nested dir TestFileUtils.createTempFileInDirectory(nestedDir.getAbsolutePath(), "paella"); TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "kalamari"); TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "fideua"); this.format.setFilePath(new Path(nestedDir.toURI().toString())); this.config.setBoolean("recursive.file.enumeration", true); format.configure(this.config); FileInputSplit[] splits = format.createInputSplits(1); Assert.assertEquals(3, splits.length); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } /** * Test with one nested directory and recursive.file.enumeration = false */ @Test public void testOneNestedDirectoryFalse() { try { String firstLevelDir = TestFileUtils.randomFileName(); String secondLevelDir = TestFileUtils.randomFileName(); File nestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir); nestedDir.mkdirs(); nestedDir.deleteOnExit(); File insideNestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir + System.getProperty("file.separator") + secondLevelDir); insideNestedDir.mkdirs(); insideNestedDir.deleteOnExit(); // create a file in the first-level and two files in the nested dir TestFileUtils.createTempFileInDirectory(nestedDir.getAbsolutePath(), "paella"); TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "kalamari"); TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "fideua"); this.format.setFilePath(new Path(nestedDir.toURI().toString())); this.config.setBoolean("recursive.file.enumeration", false); format.configure(this.config); FileInputSplit[] splits = format.createInputSplits(1); Assert.assertEquals(1, splits.length); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } /** * Test with two nested directories and recursive.file.enumeration = true */ @Test public void testTwoNestedDirectoriesTrue() { try { String firstLevelDir = TestFileUtils.randomFileName(); String secondLevelDir = TestFileUtils.randomFileName(); String thirdLevelDir = TestFileUtils.randomFileName(); File nestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir); nestedDir.mkdirs(); nestedDir.deleteOnExit(); File insideNestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir + System.getProperty("file.separator") + secondLevelDir); insideNestedDir.mkdirs(); insideNestedDir.deleteOnExit(); File nestedNestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir + System.getProperty("file.separator") + secondLevelDir + System.getProperty("file.separator") + thirdLevelDir); nestedNestedDir.mkdirs(); nestedNestedDir.deleteOnExit(); // create a file in the first-level, two files in the second level and one in the third level TestFileUtils.createTempFileInDirectory(nestedDir.getAbsolutePath(), "paella"); TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "kalamari"); TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "fideua"); TestFileUtils.createTempFileInDirectory(nestedNestedDir.getAbsolutePath(), "bravas"); this.format.setFilePath(new Path(nestedDir.toURI().toString())); this.config.setBoolean("recursive.file.enumeration", true); format.configure(this.config); FileInputSplit[] splits = format.createInputSplits(1); Assert.assertEquals(4, splits.length); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } /** * Tests if the recursion is invoked correctly in nested directories. */ @Test public void testOnlyLevel2NestedDirectories() { try { String rootDir = TestFileUtils.randomFileName(); String nestedDir = TestFileUtils.randomFileName(); String firstNestedNestedDir = TestFileUtils.randomFileName(); String secondNestedNestedDir = TestFileUtils.randomFileName(); File testDir = new File(tempPath + System.getProperty("file.separator") + rootDir); testDir.mkdirs(); testDir.deleteOnExit(); File nested = new File(testDir.getAbsolutePath() + System.getProperty("file.separator") + nestedDir); nested.mkdirs(); nested.deleteOnExit(); File nestedNestedDir1 = new File(nested.getAbsolutePath() + System.getProperty("file.separator") + firstNestedNestedDir); nestedNestedDir1.mkdirs(); nestedNestedDir1.deleteOnExit(); File nestedNestedDir2 = new File(nested.getAbsolutePath() + System.getProperty("file.separator") + secondNestedNestedDir); nestedNestedDir2.mkdirs(); nestedNestedDir2.deleteOnExit(); // create files in second level TestFileUtils.createTempFileInDirectory(nestedNestedDir1.getAbsolutePath(), "paella"); TestFileUtils.createTempFileInDirectory(nestedNestedDir1.getAbsolutePath(), "kalamari"); TestFileUtils.createTempFileInDirectory(nestedNestedDir2.getAbsolutePath(), "fideua"); TestFileUtils.createTempFileInDirectory(nestedNestedDir2.getAbsolutePath(), "bravas"); this.format.setFilePath(new Path(testDir.getAbsolutePath())); this.config.setBoolean("recursive.file.enumeration", true); format.configure(this.config); FileInputSplit[] splits = format.createInputSplits(1); Assert.assertEquals(4, splits.length); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } /** * Test with two nested directories and recursive.file.enumeration = true */ @Test public void testTwoNestedDirectoriesWithFilteredFilesTrue() { String sep = System.getProperty("file.separator"); try { String firstLevelDir = TestFileUtils.randomFileName(); String secondLevelDir = TestFileUtils.randomFileName(); String thirdLevelDir = TestFileUtils.randomFileName(); String secondLevelFilterDir = "_"+TestFileUtils.randomFileName(); String thirdLevelFilterDir = "_"+TestFileUtils.randomFileName(); File nestedDir = new File(tempPath + sep + firstLevelDir); nestedDir.mkdirs(); nestedDir.deleteOnExit(); File insideNestedDir = new File(tempPath + sep + firstLevelDir + sep + secondLevelDir); insideNestedDir.mkdirs(); insideNestedDir.deleteOnExit(); File insideNestedDirFiltered = new File(tempPath + sep + firstLevelDir + sep + secondLevelFilterDir); insideNestedDirFiltered.mkdirs(); insideNestedDirFiltered.deleteOnExit(); File filteredFile = new File(tempPath + sep + firstLevelDir + sep + "_IWillBeFiltered"); filteredFile.createNewFile(); filteredFile.deleteOnExit(); File nestedNestedDir = new File(tempPath + sep + firstLevelDir + sep + secondLevelDir + sep + thirdLevelDir); nestedNestedDir.mkdirs(); nestedNestedDir.deleteOnExit(); File nestedNestedDirFiltered = new File(tempPath + sep + firstLevelDir + sep + secondLevelDir + sep + thirdLevelFilterDir); nestedNestedDirFiltered.mkdirs(); nestedNestedDirFiltered.deleteOnExit(); // create a file in the first-level, two files in the second level and one in the third level TestFileUtils.createTempFileInDirectory(nestedDir.getAbsolutePath(), "paella"); TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "kalamari"); TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "fideua"); TestFileUtils.createTempFileInDirectory(nestedNestedDir.getAbsolutePath(), "bravas"); // create files which are filtered TestFileUtils.createTempFileInDirectory(insideNestedDirFiltered.getAbsolutePath(), "kalamari"); TestFileUtils.createTempFileInDirectory(insideNestedDirFiltered.getAbsolutePath(), "fideua"); TestFileUtils.createTempFileInDirectory(nestedNestedDirFiltered.getAbsolutePath(), "bravas"); this.format.setFilePath(new Path(nestedDir.toURI().toString())); this.config.setBoolean("recursive.file.enumeration", true); format.configure(this.config); FileInputSplit[] splits = format.createInputSplits(1); Assert.assertEquals(4, splits.length); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } @Test public void testGetStatisticsOneFileInNestedDir() { try { final long SIZE = 1024 * 500; String firstLevelDir = TestFileUtils.randomFileName(); String secondLevelDir = TestFileUtils.randomFileName(); File nestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir); nestedDir.mkdirs(); nestedDir.deleteOnExit(); File insideNestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir + System.getProperty("file.separator") + secondLevelDir); insideNestedDir.mkdirs(); insideNestedDir.deleteOnExit(); // create a file in the nested dir TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), SIZE); this.format.setFilePath(new Path(nestedDir.toURI().toString())); this.config.setBoolean("recursive.file.enumeration", true); format.configure(this.config); BaseStatistics stats = format.getStatistics(null); Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize()); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } @Test public void testGetStatisticsMultipleNestedFiles() { try { final long SIZE1 = 2077; final long SIZE2 = 31909; final long SIZE3 = 10; final long SIZE4 = 71; final long TOTAL = SIZE1 + SIZE2 + SIZE3 + SIZE4; String firstLevelDir = TestFileUtils.randomFileName(); String secondLevelDir = TestFileUtils.randomFileName(); String secondLevelDir2 = TestFileUtils.randomFileName(); File nestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir); nestedDir.mkdirs(); nestedDir.deleteOnExit(); File insideNestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir + System.getProperty("file.separator") + secondLevelDir); insideNestedDir.mkdirs(); insideNestedDir.deleteOnExit(); File insideNestedDir2 = new File(tempPath + System.getProperty("file.separator") + firstLevelDir + System.getProperty("file.separator") + secondLevelDir2); insideNestedDir2.mkdirs(); insideNestedDir2.deleteOnExit(); // create a file in the first-level and two files in the nested dir TestFileUtils.createTempFileInDirectory(nestedDir.getAbsolutePath(), SIZE1); TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), SIZE2); TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), SIZE3); TestFileUtils.createTempFileInDirectory(insideNestedDir2.getAbsolutePath(), SIZE4); this.format.setFilePath(new Path(nestedDir.toURI().toString())); this.config.setBoolean("recursive.file.enumeration", true); format.configure(this.config); BaseStatistics stats = format.getStatistics(null); Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize()); /* Now invalidate the cache and check again */ Thread.sleep(1000); // accuracy of file modification times is rather low TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), 42L); BaseStatistics stats2 = format.getStatistics(stats); Assert.assertNotEquals(stats2, stats); Assert.assertEquals("The file size from the statistics is wrong.", TOTAL + 42L, stats2.getTotalInputSize()); } catch (Exception ex) { ex.printStackTrace(); Assert.fail(ex.getMessage()); } } // ------------------------------------------------------------------------ private class DummyFileInputFormat extends FileInputFormat<IntValue> { private static final long serialVersionUID = 1L; @Override public boolean reachedEnd() throws IOException { return true; } @Override public IntValue nextRecord(IntValue reuse) throws IOException { return null; } } }