package org.icij.extract.queue; import java.nio.file.*; import java.util.Timer; import java.util.TimerTask; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.icij.kaxxa.concurrent.BooleanSealableLatch; import org.icij.extract.document.DocumentFactory; import org.icij.extract.document.PathIdentifier; import org.icij.extract.queue.ArrayDocumentQueue; import org.icij.extract.queue.DocumentQueue; import org.icij.extract.queue.Scanner; import org.junit.*; public class ScannerTest { private final DocumentFactory factory = new DocumentFactory().withIdentifier(new PathIdentifier()); private final DocumentQueue queue = new ArrayDocumentQueue(100); private Scanner createScanner() { return new Scanner(factory, queue); } private void shutdownScanner(final Scanner scanner) throws InterruptedException { scanner.shutdown(); scanner.awaitTermination(1, TimeUnit.SECONDS); } @After public void tearDown() throws InterruptedException { queue.clear(); } @Test public void testScanDirectory() throws Throwable { final Path root = Paths.get(getClass().getResource("/documents/text/").toURI()); final Scanner scanner = createScanner(); // Block until every single path has been scanned and queued. final Future<Path> job = scanner.scan(root); Assert.assertEquals(job.get(), root); shutdownScanner(scanner); // Assert that the queue contains at least one file, manually. Assert.assertTrue(Files.exists(root.resolve("plain.txt"))); Assert.assertTrue(String.format("Queued file path must start with root path \"%s\".", root), queue.contains(factory.create(root.resolve("plain.txt")))); try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(root)) { for (Path file : directoryStream) { Assert.assertTrue(String.format("Failed asserting that queue contains \"%s\".", file), queue.contains(factory.create(file))); } } } @Test public void testScanDirectoryWithIncludeGlob() throws Throwable { final Path root = Paths.get(getClass().getResource("/documents/").toURI()); final Scanner scanner = createScanner(); scanner.include("**.txt"); // Block until every single path has been scanned and queued. final Future<Path> job = scanner.scan(root); Assert.assertEquals(job.get(), root); shutdownScanner(scanner); // Assert that the queue contains at least one file, manually. final Path garbage = root.resolve("garbage.bin"); Assert.assertTrue(Files.exists(garbage)); Assert.assertFalse(queue.contains(factory.create(garbage))); Assert.assertTrue(queue.contains(factory.create(root.resolve("text/plain.txt")))); } @Test public void testScanDirectoryWithExcludeGlob() throws Throwable { final Path root = Paths.get(getClass().getResource("/documents/").toURI()); final Scanner scanner = createScanner(); // Test exclude paths by extension. scanner.exclude("**.bin"); // Test excluding directories. scanner.exclude("**/ocr"); // Block until every single path has been scanned and queued. final Future<Path> job = scanner.scan(root); Assert.assertEquals(job.get(), root); shutdownScanner(scanner); // Assert that the queue contains at least one file, manually. final Path garbage = root.resolve("garbage.bin"); final Path ocrTiff = root.resolve("ocr/simple.tiff"); // Test paths excluded by extension. Assert.assertTrue(Files.exists(garbage)); Assert.assertFalse(queue.contains(factory.create(garbage))); // Test whether entire directory was excluded. Assert.assertTrue(Files.exists(ocrTiff)); Assert.assertFalse(queue.contains(factory.create(ocrTiff))); // Test whether at least one other file was included. Assert.assertTrue(queue.contains(factory.create(root.resolve("text/plain.txt")))); } @Test public void testHandlesSymlink() throws Throwable { final Path root = Paths.get(getClass().getResource("/links/").toURI()); final Scanner scanner = createScanner(); scanner.followSymLinks(true); Assert.assertTrue(scanner.followSymLinks()); // Create the link. final Path documents = root.resolve("documents"); if (Files.notExists(documents)) { Files.createSymbolicLink(documents, root.resolve("../documents")); } final Future<Path> job = scanner.scan(root); Assert.assertEquals(job.get(), root); shutdownScanner(scanner); // Assert that the queue doesn't contain the symlink, but contains linked files. Assert.assertTrue(Files.isSymbolicLink(documents)); Assert.assertTrue(Files.exists(documents)); Assert.assertFalse(queue.contains(factory.create(documents))); Assert.assertTrue(queue.contains(factory.create(root.resolve("documents/garbage.bin")))); } @Test public void testIgnoreHiddenFiles() throws Throwable { final Path root = Paths.get(getClass().getResource("/documents/").toURI()); final Path hidden = root.resolve(".hidden"); final Scanner scanner = createScanner(); scanner.ignoreHiddenFiles(true); Assert.assertTrue(scanner.ignoreHiddenFiles()); // Block until every single path has been scanned and queued. Assert.assertEquals(scanner.scan(root).get(), root); // Assert that the queue does not contain the hidden file. Assert.assertTrue(Files.exists(hidden)); Assert.assertFalse(queue.contains(factory.create(hidden))); // Now test if hidden files are scanned. scanner.ignoreHiddenFiles(false); Assert.assertFalse(scanner.ignoreHiddenFiles()); Assert.assertEquals(scanner.scan(root).get(), root); Assert.assertTrue(queue.contains(factory.create(hidden))); shutdownScanner(scanner); } @Test public void testIgnoresSystemFilesByDefault() throws Throwable { final Path root = Paths.get(getClass().getResource("/documents/").toURI()); final Path system = root.resolve("lost+found/trashed"); final Scanner scanner = createScanner(); Assert.assertTrue(scanner.ignoreSystemFiles()); // Block until every single path has been scanned and queued. Assert.assertEquals(scanner.scan(root).get(), root); // Assert that the queue does not contain the system file. Assert.assertTrue(Files.exists(system)); Assert.assertFalse(queue.contains(factory.create(system))); // Now test if system files are scanned. scanner.ignoreSystemFiles(false); Assert.assertFalse(scanner.ignoreSystemFiles()); Assert.assertEquals(scanner.scan(root).get(), root); Assert.assertTrue(queue.contains(factory.create(system))); shutdownScanner(scanner); } @Test public void testMaxDepth() throws Throwable { final Path root = Paths.get(getClass().getResource("/documents/").toURI()); final Scanner scanner = createScanner(); scanner.setMaxDepth(1); Assert.assertEquals(1, scanner.getMaxDepth()); // Block until every single path has been scanned and queued. Assert.assertEquals(scanner.scan(root).get(), root); Assert.assertTrue(Files.exists(root.resolve("text/plain.txt"))); Assert.assertFalse(queue.contains(factory.create(root.resolve("text/plain.txt")))); Assert.assertTrue(queue.contains(factory.create(root.resolve("garbage.bin")))); shutdownScanner(scanner); } @Test public void testLatch() throws Throwable { final Path root = Paths.get(getClass().getResource("/documents/").toURI()); final Path garbage = root.resolve("garbage.bin"); final Scanner scanner = new Scanner(factory, queue, new BooleanSealableLatch()); new Timer().schedule(new TimerTask() { @Override public void run() { scanner.scan(garbage); } }, 1000); scanner.getLatch().await(); Assert.assertTrue(Files.exists(garbage)); Assert.assertTrue(queue.contains(factory.create(garbage))); shutdownScanner(scanner); } }