/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.source; import java.io.IOException; import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; import java.util.List; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.SourceState; public class RegexBasedPartitionedRetrieverTest { private Path tempDir; private enum DateToUse { APR_1_2017(1491004800000L), APR_3_2017(1491177600000L), MAY_1_2017(1493596800000L); private final long value; DateToUse(long val) { this.value = val; } public long getValue() { return value; } } @BeforeClass public void setupDirectories() throws IOException { tempDir = Files.createTempDirectory("regexTest"); for (DateToUse d : DateToUse.values()) { Path subdir = tempDir.resolve(String.format("%d-PT-123456", d.getValue())); Files.createDirectory(subdir); Files.createFile(subdir.resolve("foo.txt")); } } @AfterClass public void cleanup() throws IOException { Files.walkFileTree(tempDir, new SimpleFileVisitor<Path>() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { Files.delete(file); return FileVisitResult.CONTINUE; } @Override public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { Files.delete(dir); return FileVisitResult.CONTINUE; } }); } @Test public void testSnapshotRegex() throws IOException { String snapshotRegex = "(\\d+)-PT-\\d+"; RegexBasedPartitionedRetriever r = new RegexBasedPartitionedRetriever("txt"); SourceState state = new SourceState(); state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, "file:///"); state.setProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY, tempDir.toString()); state.setProp(PartitionedFileSourceBase.DATE_PARTITIONED_SOURCE_PARTITION_PATTERN, snapshotRegex); r.init(state); List<PartitionAwareFileRetriever.FileInfo> files = r.getFilesToProcess(DateToUse.APR_3_2017.getValue() - 1, 9999); Assert.assertEquals(files.size(), 2); verifyFile(files.get(0), DateToUse.APR_3_2017.getValue()); verifyFile(files.get(1), DateToUse.MAY_1_2017.getValue()); } private void verifyFile(PartitionAwareFileRetriever.FileInfo fileInfo, long value) { org.apache.hadoop.fs.Path expectedStart = new org.apache.hadoop.fs.Path(tempDir.toUri()); String expectedEnd = String.format("%d-PT-123456/foo.txt", value); Assert.assertEquals(fileInfo.getWatermarkMsSinceEpoch(), value); Assert.assertTrue(fileInfo.getFilePath().startsWith(expectedStart.toString())); Assert.assertTrue(fileInfo.getFilePath().endsWith(expectedEnd)); Assert.assertEquals(fileInfo.getFileSize(), 0); } }