/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.data.management.copy.replication; import java.net.URI; import java.util.Collection; import java.util.Properties; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.Test; import com.google.common.base.Optional; import com.google.common.collect.Sets; import gobblin.configuration.ConfigurationKeys; import gobblin.data.management.copy.CopyConfiguration; import gobblin.data.management.copy.CopyEntity; import gobblin.data.management.copy.CopyableFile; import gobblin.data.management.copy.PreserveAttributes; import gobblin.data.management.copy.entities.PostPublishStep; import gobblin.data.management.copy.entities.PrePublishStep; import gobblin.source.extractor.ComparableWatermark; import gobblin.source.extractor.extract.LongWatermark; import gobblin.util.FileListUtils; import gobblin.util.PathUtils; import gobblin.util.commit.DeleteFileCommitStep; /** * Unit test for {@link ConfigBasedDatasets} * @author mitu * */ @Test(groups = {"gobblin.data.management.copy.replication"}) public class ConfigBasedDatasetsTest { @Test public void testGetCopyableFiles() throws Exception { String sourceDir = getClass().getClassLoader().getResource("configBasedDatasetTest/src").getFile(); String destinationDir = getClass().getClassLoader().getResource("configBasedDatasetTest/dest").getFile(); FileSystem localFs = FileSystem.getLocal(new Configuration()); URI local = localFs.getUri(); long sourceWatermark = 100L; Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher"); CopyConfiguration copyConfiguration = CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).publishDir(new Path(destinationDir)) .preserve(PreserveAttributes.fromMnemonicString("ugp")).build(); ReplicationMetaData mockMetaData = Mockito.mock(ReplicationMetaData.class); Mockito.when(mockMetaData.toString()).thenReturn("Mock Meta Data"); ReplicationConfiguration mockRC = Mockito.mock(ReplicationConfiguration.class); Mockito.when(mockRC.getCopyMode()).thenReturn(ReplicationCopyMode.PULL); Mockito.when(mockRC.getMetaData()).thenReturn(mockMetaData); HadoopFsEndPoint copyFrom = Mockito.mock(HadoopFsEndPoint.class); Mockito.when(copyFrom.getDatasetPath()).thenReturn(new Path(sourceDir)); Mockito.when(copyFrom.getFsURI()).thenReturn(local); ComparableWatermark sw = new LongWatermark(sourceWatermark); Mockito.when(copyFrom.getWatermark()).thenReturn(Optional.of(sw)); Mockito.when(copyFrom.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs, new Path(sourceDir))); HadoopFsEndPoint copyTo = Mockito.mock(HadoopFsEndPoint.class); Mockito.when(copyTo.getDatasetPath()).thenReturn(new Path(destinationDir)); Mockito.when(copyTo.getFsURI()).thenReturn(local); Optional<ComparableWatermark>tmp = Optional.absent(); Mockito.when(copyTo.getWatermark()).thenReturn(tmp); Mockito.when(copyTo.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs, new Path(destinationDir))); CopyRoute route = Mockito.mock(CopyRoute.class); Mockito.when(route.getCopyFrom()).thenReturn(copyFrom); Mockito.when(route.getCopyTo()).thenReturn(copyTo); ConfigBasedDataset dataset = new ConfigBasedDataset(mockRC, properties, route); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(localFs, copyConfiguration); Assert.assertEquals(copyableFiles.size(), 6); Set<Path> paths = Sets.newHashSet(new Path("dir1/file2"), new Path("dir1/file1"), new Path("dir2/file1"), new Path("dir2/file3")); for (CopyEntity copyEntity : copyableFiles) { if(copyEntity instanceof CopyableFile) { CopyableFile file = (CopyableFile) copyEntity; Path originRelativePath = PathUtils.relativizePath(PathUtils.getPathWithoutSchemeAndAuthority(file.getOrigin().getPath()), PathUtils.getPathWithoutSchemeAndAuthority(new Path(sourceDir))); Path targetRelativePath = PathUtils.relativizePath(PathUtils.getPathWithoutSchemeAndAuthority(file.getDestination()), PathUtils.getPathWithoutSchemeAndAuthority(new Path(destinationDir))); Assert.assertTrue(paths.contains(originRelativePath)); Assert.assertTrue(paths.contains(targetRelativePath)); Assert.assertEquals(originRelativePath, targetRelativePath); } else if(copyEntity instanceof PrePublishStep){ PrePublishStep pre = (PrePublishStep)copyEntity; Assert.assertTrue(pre.getStep() instanceof DeleteFileCommitStep); // need to delete this file Assert.assertTrue(pre.explain().indexOf("configBasedDatasetTest/dest/dir1/file1") > 0); } else if(copyEntity instanceof PostPublishStep){ PostPublishStep post = (PostPublishStep)copyEntity; Assert.assertTrue(post.getStep() instanceof WatermarkMetadataGenerationCommitStep); Assert.assertTrue(post.explain().indexOf("dest/_metadata") > 0 && post.explain().indexOf(""+sourceWatermark)>0); } else{ throw new Exception("Wrong type"); } } } }