/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.data.management.copy; import java.io.IOException; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.Test; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import gobblin.commit.CommitStep; import gobblin.configuration.ConfigurationKeys; import gobblin.data.management.copy.entities.CommitStepCopyEntity; import gobblin.util.commit.DeleteFileCommitStep; import avro.shaded.com.google.common.base.Predicate; import avro.shaded.com.google.common.collect.Iterables; import javax.annotation.Nullable; import lombok.Data; public class RecursiveCopyableDatasetTest { @Test public void testSimpleCopy() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1"), createFileStatus(source, "file2")); List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file3")); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.assertEquals(copyableFiles.size(), 2); ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")), new Path(target, "file1")); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file2"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file2")), new Path(target, "file2")); Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 0); } @Test public void testCopyWithNonConflictingCollision() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1", 1), createFileStatus(source, "file2")); List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file1", 1)); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.assertEquals(copyableFiles.size(), 1); ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file2"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file2")), new Path(target, "file2")); Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 0); } @Test public void testCopyWithConflictingCollisionDueToSize() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1", 1), createFileStatus(source, "file2")); List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file1", 2)); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); try { Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.fail(); } catch (IOException ioe) { // should throw exception due to collision } } @Test public void testCopyWithConflictingCollisionDueToModtime() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1", 1, 10), createFileStatus(source, "file2")); List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file1", 1, 9)); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); try { Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.fail(); } catch (IOException ioe) { // should throw exception due to collision } } @Test public void testCopyWithUpdate() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); FileStatus targetFile1 = createFileStatus(target, "file1", 2); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1", 1), createFileStatus(source, "file2")); List<FileStatus> targetFiles = Lists.newArrayList(targetFile1); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); properties.setProperty(RecursiveCopyableDataset.UPDATE_KEY, "true"); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); FileSystem targetFsUnderlying = FileSystem.getLocal(new Configuration()); FileSystem targetFs = Mockito.spy(targetFsUnderlying); Mockito.doReturn(targetFile1).when(targetFs).getFileStatus(new Path(target, "file1")); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(targetFs, CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.assertEquals(copyableFiles.size(), 3); ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")), new Path(target, "file1")); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file2"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file2")), new Path(target, "file2")); Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 1); Assert.assertTrue(classifiedFiles.getPathsToDelete().contains(new Path(target, "file1"))); } @Test public void testCopyWithDeleteTarget() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1")); List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file3")); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); properties.setProperty(RecursiveCopyableDataset.DELETE_KEY, "true"); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.assertEquals(copyableFiles.size(), 2); ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")), new Path(target, "file1")); Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 1); Assert.assertTrue(classifiedFiles.getPathsToDelete().contains(new Path(target, "file3"))); CommitStepCopyEntity entity = (CommitStepCopyEntity) Iterables.filter(copyableFiles, new Predicate<CopyEntity>() { @Override public boolean apply(@Nullable CopyEntity copyEntity) { return copyEntity instanceof CommitStepCopyEntity; } }).iterator().next(); DeleteFileCommitStep step = (DeleteFileCommitStep) entity.getStep(); Assert.assertFalse(step.getParentDeletionLimit().isPresent()); } @Test public void testCopyWithDeleteTargetAndDeleteParentDirectories() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1")); List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file3")); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); properties.setProperty(RecursiveCopyableDataset.DELETE_EMPTY_DIRECTORIES_KEY, "true"); properties.setProperty(RecursiveCopyableDataset.DELETE_KEY, "true"); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.assertEquals(copyableFiles.size(), 2); ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")), new Path(target, "file1")); Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 1); Assert.assertTrue(classifiedFiles.getPathsToDelete().contains(new Path(target, "file3"))); CommitStepCopyEntity entity = (CommitStepCopyEntity) Iterables.filter(copyableFiles, new Predicate<CopyEntity>() { @Override public boolean apply(@Nullable CopyEntity copyEntity) { return copyEntity instanceof CommitStepCopyEntity; } }).iterator().next(); DeleteFileCommitStep step = (DeleteFileCommitStep) entity.getStep(); Assert.assertTrue(step.getParentDeletionLimit().isPresent()); Assert.assertEquals(step.getParentDeletionLimit().get(), target); } @Test public void testCorrectComputationOfTargetPathsWhenUsingGlob() throws Exception { Path source = new Path("/source/directory"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1")); List<FileStatus> targetFiles = Lists.newArrayList(); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); FileSystem sourceUnderlyingFS = FileSystem.getLocal(new Configuration()); FileSystem sourceFs = Mockito.spy(sourceUnderlyingFS); Mockito.doReturn(new FileStatus(0, true, 0, 0, 0, source)).when(sourceFs).getFileStatus(source); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, new Path(target, "directory"), sourceFiles, targetFiles, properties, new Path("/source/*"), sourceFs); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.get(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.assertEquals(copyableFiles.size(), 1); ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")), new Path(target, "directory/file1")); } private ClassifiedFiles classifyFiles(Collection<? extends CopyEntity> copyEntities) { Map<Path, Path> pathsToCopy = Maps.newHashMap(); Set<Path> pathsToDelete = Sets.newHashSet(); for (CopyEntity ce : copyEntities) { if (ce instanceof CopyableFile) { pathsToCopy.put(((CopyableFile) ce).getOrigin().getPath(), ((CopyableFile) ce).getDestination()); } if (ce instanceof CommitStepCopyEntity) { CommitStep step = ((CommitStepCopyEntity) ce).getStep(); if (step instanceof DeleteFileCommitStep) { for (FileStatus status : ((DeleteFileCommitStep) step).getPathsToDelete()) { pathsToDelete.add(status.getPath()); } } } } return new ClassifiedFiles(pathsToCopy, pathsToDelete); } @Data private class ClassifiedFiles { private final Map<Path, Path> pathsToCopy; private final Set<Path> pathsToDelete; } private FileStatus createFileStatus(Path root, String relative) { return createFileStatus(root, relative, 0, 0); } private FileStatus createFileStatus(Path root, String relative, long length) { return createFileStatus(root, relative, length, 0); } private FileStatus createFileStatus(Path root, String relative, long length, long modtime) { return new FileStatus(length, false, 0, 0, modtime, new Path(root, relative)); } private static class TestRecursiveCopyableDataset extends RecursiveCopyableDataset { private final Path source; private final Path target; private final List<FileStatus> sourceFiles; private final List<FileStatus> targetFiles; public TestRecursiveCopyableDataset(Path source, Path target, List<FileStatus> sourceFiles, List<FileStatus> targetFiles, Properties properties) throws IOException { this(source, target, sourceFiles, targetFiles, properties, source, FileSystem.getLocal(new Configuration())); } public TestRecursiveCopyableDataset(Path source, Path target, List<FileStatus> sourceFiles, List<FileStatus> targetFiles, Properties properties, Path glob, FileSystem sourceFs) throws IOException { super(sourceFs, source, properties, glob); this.source = source; this.target = target; this.sourceFiles = sourceFiles; this.targetFiles = targetFiles; } @Override protected List<FileStatus> getFilesAtPath(FileSystem fs, Path path, PathFilter fileFilter) throws IOException { if (path.equals(this.source)) { return this.sourceFiles; } else if (path.equals(this.target)) { return this.targetFiles; } else { throw new RuntimeException("Not a recognized path. " + path); } } } }