/* * Copyright © 2014-2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data2.dataset2.lib; import co.cask.cdap.api.dataset.DataSetException; import co.cask.cdap.api.dataset.DatasetManagementException; import co.cask.cdap.api.dataset.lib.FileSet; import co.cask.cdap.api.dataset.lib.FileSetArguments; import co.cask.cdap.api.dataset.lib.FileSetProperties; import co.cask.cdap.common.conf.Constants; import co.cask.cdap.common.io.Locations; import co.cask.cdap.data2.dataset2.DatasetFrameworkTestUtil; import co.cask.cdap.data2.dataset2.lib.file.FileSetDataset; import co.cask.cdap.proto.Id; import co.cask.tephra.TransactionFailureException; import com.google.common.collect.Maps; import org.apache.twill.filesystem.Location; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.ClassRule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.Map; public class FileSetTest { @ClassRule public static TemporaryFolder tmpFolder = new TemporaryFolder(); @ClassRule public static DatasetFrameworkTestUtil dsFrameworkUtil = new DatasetFrameworkTestUtil(); static FileSet fileSet1; static FileSet fileSet2; private static final Id.Namespace OTHER_NAMESPACE = Id.Namespace.from("yourspace"); private static final Id.DatasetInstance testFileSetInstance1 = Id.DatasetInstance.from(DatasetFrameworkTestUtil.NAMESPACE_ID, "testFileSet"); private static final Id.DatasetInstance testFileSetInstance2 = Id.DatasetInstance.from(OTHER_NAMESPACE, "testFileSet"); private static final Id.DatasetInstance testFileSetInstance3 = Id.DatasetInstance.from(DatasetFrameworkTestUtil.NAMESPACE_ID, "absoluteFileSet"); private static final Id.DatasetInstance testFileSetInstance4 = Id.DatasetInstance.from(DatasetFrameworkTestUtil.NAMESPACE_ID, "lookAlikeFileSet"); private static final Id.DatasetInstance testFileSetInstance5 = Id.DatasetInstance.from(DatasetFrameworkTestUtil.NAMESPACE_ID, "externalFileSet"); @Before public void before() throws Exception { dsFrameworkUtil.createInstance("fileSet", testFileSetInstance1, FileSetProperties.builder() .setBasePath("testDir").build()); Map<String, String> fileArgs = Maps.newHashMap(); FileSetArguments.setInputPath(fileArgs, "some?File1"); FileSetArguments.setOutputPath(fileArgs, "some?File1"); fileSet1 = dsFrameworkUtil.getInstance(testFileSetInstance1, fileArgs); dsFrameworkUtil.createInstance("fileSet", testFileSetInstance2, FileSetProperties.builder() .setBasePath("testDir").build()); fileArgs = Maps.newHashMap(); FileSetArguments.setInputPath(fileArgs, "some?File2"); FileSetArguments.setOutputPath(fileArgs, "some?File2"); fileSet2 = dsFrameworkUtil.getInstance(testFileSetInstance2, fileArgs); } @After public void after() throws Exception { deleteInstance(testFileSetInstance1); deleteInstance(testFileSetInstance2); deleteInstance(testFileSetInstance3); deleteInstance(testFileSetInstance4); deleteInstance(testFileSetInstance5); } static void deleteInstance(Id.DatasetInstance id) throws Exception { if (dsFrameworkUtil.getInstance(id) != null) { dsFrameworkUtil.deleteInstance(id); } } @Test public void testWriteRead() throws IOException { Location fileSet1Output = fileSet1.getOutputLocation(); Location fileSet2Output = fileSet2.getOutputLocation(); Location fileSet1NsDir = Locations.getParent(Locations.getParent(Locations.getParent(fileSet1Output))); Location fileSet2NsDir = Locations.getParent(Locations.getParent(Locations.getParent(fileSet2Output))); Assert.assertNotNull(fileSet1NsDir); Assert.assertNotNull(fileSet2NsDir); Assert.assertEquals(fileSet1NsDir.getName(), DatasetFrameworkTestUtil.NAMESPACE_ID.getId()); Assert.assertEquals(fileSet2NsDir.getName(), OTHER_NAMESPACE.getId()); Assert.assertNotEquals(fileSet1.getInputLocations().get(0).toURI().getPath(), fileSet2.getInputLocations().get(0).toURI().getPath()); Assert.assertNotEquals(fileSet1Output.toURI().getPath(), fileSet2Output.toURI().getPath()); try (OutputStream out = fileSet1.getOutputLocation().getOutputStream()) { out.write(42); } try (OutputStream out = fileSet2.getOutputLocation().getOutputStream()) { out.write(54); } try (InputStream in = fileSet1.getInputLocations().get(0).getInputStream()) { Assert.assertEquals(42, in.read()); } try (InputStream in = fileSet2.getInputLocations().get(0).getInputStream()) { Assert.assertEquals(54, in.read()); } } @Test public void testAbsolutePath() throws IOException, DatasetManagementException { String absolutePath = tmpFolder.newFolder() + "/absolute/path"; dsFrameworkUtil.createInstance("fileSet", testFileSetInstance3, FileSetProperties.builder() .setBasePath(absolutePath).build()); // validate that the base path for the file set was created Assert.assertTrue(new File(absolutePath).isDirectory()); // instantiate the file set with an output path Map<String, String> fileArgs = Maps.newHashMap(); FileSetArguments.setOutputPath(fileArgs, "out"); FileSet fileSet = dsFrameworkUtil.getInstance(testFileSetInstance3, fileArgs); // write to the output path Assert.assertEquals(absolutePath + "/out", fileSet.getOutputLocation().toURI().getPath()); try (OutputStream out = fileSet.getOutputLocation().getOutputStream()) { out.write(42); } // validate that the file was created Assert.assertTrue(new File(absolutePath + "/out").isFile()); } @Test(expected = DataSetException.class) public void testAbsolutePathInsideCDAP() throws IOException, DatasetManagementException { String absolutePath = dsFrameworkUtil.getConfiguration().get(Constants.CFG_LOCAL_DATA_DIR).concat("/hello"); dsFrameworkUtil.createInstance("fileSet", Id.DatasetInstance.from(DatasetFrameworkTestUtil.NAMESPACE_ID, "badFileSet"), FileSetProperties.builder().setBasePath(absolutePath).build()); } @Test(expected = DataSetException.class) public void testAbsolutePathInsideCDAPDouble() throws IOException, DatasetManagementException { // test that it rejects also paths that have // in them String absolutePath = dsFrameworkUtil.getConfiguration() .get(Constants.CFG_LOCAL_DATA_DIR).replace("/", "//").concat("/hello"); dsFrameworkUtil.createInstance("fileSet", Id.DatasetInstance.from(DatasetFrameworkTestUtil.NAMESPACE_ID, "badFileSet"), FileSetProperties.builder().setBasePath(absolutePath).build()); } @Test public void testAbsolutePathLooksLikeCDAP() throws IOException, DatasetManagementException { String absolutePath = dsFrameworkUtil.getConfiguration().get(Constants.CFG_LOCAL_DATA_DIR).concat("-hello"); dsFrameworkUtil.createInstance("fileSet", testFileSetInstance4, FileSetProperties.builder().setBasePath(absolutePath).build()); } @Test public void testExternalAbsolutePath() throws IOException, DatasetManagementException { // create an external dir and create a file in it String absolutePath = tmpFolder.newFolder() + "/absolute/path"; File absoluteFile = new File(absolutePath); absoluteFile.mkdirs(); File someFile = new File(absoluteFile, "some.file"); someFile.createNewFile(); // create an external dataset dsFrameworkUtil.createInstance("fileSet", testFileSetInstance5, FileSetProperties.builder() .setBasePath(absolutePath) .setDataExternal(true) .build()); // instantiate the file set with an input and output path Map<String, String> fileArgs = Maps.newHashMap(); FileSetArguments.setInputPath(fileArgs, "some.file"); FileSetArguments.setOutputPath(fileArgs, "out"); FileSet fileSet = dsFrameworkUtil.getInstance(testFileSetInstance5, fileArgs); Assert.assertNotNull(fileSet); // read the existing file Location input = fileSet.getInputLocations().iterator().next(); InputStream in = input.getInputStream(); in.close(); // attempt to write an output file try { fileSet.getOutputLocation(); Assert.fail("Extrernal file set should not allow writing output."); } catch (UnsupportedOperationException e) { // expected } // delete the dataset and validate that the files are still there dsFrameworkUtil.deleteInstance(testFileSetInstance5); Assert.assertTrue(someFile.exists()); } @Test(expected = IOException.class) public void testExternalNonExistentPath() throws IOException, DatasetManagementException { // create an external dir and create a file in it String absolutePath = tmpFolder.newFolder() + "/not/there"; // attempt to create an external dataset - should fail dsFrameworkUtil.createInstance("fileSet", testFileSetInstance5, FileSetProperties.builder() .setBasePath(absolutePath) .setDataExternal(true) .build()); } @Test public void testRollback() throws IOException, TransactionFailureException { // test deletion of an empty output directory Location outputLocation = fileSet1.getOutputLocation(); Assert.assertFalse(outputLocation.exists()); Assert.assertTrue(outputLocation.mkdirs()); Assert.assertTrue(outputLocation.exists()); ((FileSetDataset) fileSet1).onFailure(); Assert.assertFalse(outputLocation.exists()); } @Test public void testRollbackOfNonDirectoryOutput() throws IOException, TransactionFailureException { // test deletion of an output location, pointing to a non-directory file Location outputFile = fileSet1.getOutputLocation(); Assert.assertFalse(outputFile.exists()); outputFile.getOutputStream().close(); Assert.assertTrue(outputFile.exists()); ((FileSetDataset) fileSet1).onFailure(); // the output file should still not be deleted Assert.assertTrue(outputFile.exists()); } @Test public void testRollbackWithNonEmptyDir() throws IOException, TransactionFailureException { Location outputDir = fileSet1.getOutputLocation(); Assert.assertFalse(outputDir.exists()); Assert.assertTrue(outputDir.mkdirs()); Location outputFile = outputDir.append("outputFile"); // this will create the outputFile outputFile.getOutputStream().close(); Assert.assertTrue(outputFile.exists()); Assert.assertTrue(outputDir.exists()); ((FileSetDataset) fileSet1).onFailure(); // both the output dir and file in it should still exist Assert.assertTrue(outputDir.exists()); Assert.assertTrue(outputFile.exists()); } }