//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.collectionreaders; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; import java.nio.file.Files; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.DocumentAnnotation; import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import uk.gov.dstl.baleen.collectionreaders.testing.AbstractReaderTest; import uk.gov.dstl.baleen.uima.BaleenCollectionReader; public class FolderReaderTest extends AbstractReaderTest{ private static final String TEST3_FILE = "test3.txt"; private static final String TEST2_FILE = "test2.txt"; private static final String TEXT1_FILE = "test1.txt"; private static final String DIR = "baleen-test"; File inputDir; private static Long TIMEOUT = 1000L; public FolderReaderTest(){ super(FolderReader.class); } @BeforeClass public static void beforeClass(){ //If we're testing on a Mac, then we need to set the time out higher, //as currently the WatchService on a Mac uses polling rather than a //native implementation and therefore we need to ensure we wait longer //than the poll interval if(System.getProperty("os.name").toLowerCase().startsWith("mac os x")){ TIMEOUT = 15000L; } } @Before public void before() throws Exception{ inputDir = Files.createTempDirectory(DIR).toFile(); } @After public void after() throws IOException{ String[] entries = inputDir.list(); if(entries != null){ for(String s : entries){ File currentFile = new File(inputDir.getPath(), s); currentFile.delete(); } } inputDir.delete(); } @Test public void testCreateFileDefaultDirectory() throws Exception{ BaleenCollectionReader bcr = getCollectionReader(); assertTrue(bcr.doHasNext()); //There will be files in the current directory, so we can just check that it's picked them up. bcr.getNext(jCas.getCas()); assertTrue(getSource(jCas).contains(System.getProperty("user.dir"))); bcr.close(); } @Test public void testCreateFile() throws Exception{ BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[]{inputDir.getPath()}); assertFalse(bcr.doHasNext()); File f = new File(inputDir, TEXT1_FILE); f.createNewFile(); //Wait for file to be written and change detected Thread.sleep(TIMEOUT); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertFilesEquals(f.getPath(), getSource(jCas)); assertFalse(bcr.doHasNext()); bcr.close(); } @Test public void testMultipleDirectories() throws Exception{ File inputDir2 = Files.createTempDirectory(DIR).toFile(); File f11 = new File(inputDir, TEXT1_FILE); f11.createNewFile(); File f21 = new File(inputDir2, TEXT1_FILE); f21.createNewFile(); BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[]{inputDir.getPath(), inputDir2.getPath()}); File f12 = new File(inputDir, TEST2_FILE); f12.createNewFile(); File f22 = new File(inputDir2, TEST2_FILE); f22.createNewFile(); Thread.sleep(TIMEOUT); assertNextSourceNotNull(bcr); assertNextSourceNotNull(bcr); assertNextSourceNotNull(bcr); assertNextSourceNotNull(bcr); f21.delete(); f22.delete(); inputDir2.delete(); } @Test public void testSubDirectories() throws Exception{ File subdir = new File(inputDir, "subdir"); subdir.mkdir(); File f1 = new File(subdir, TEXT1_FILE); f1.createNewFile(); BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[]{inputDir.getPath()}); assertNextSourceNotNull(bcr); File f2 = new File(subdir, TEST2_FILE); f2.createNewFile(); Thread.sleep(TIMEOUT); assertNextSourceNotNull(bcr); bcr.close(); f1.delete(); f2.delete(); subdir.delete(); } @Test public void testSubDirectoriesNonRecursive() throws Exception{ File subdir = new File(inputDir, "subdir"); subdir.mkdir(); File f1 = new File(subdir, TEXT1_FILE); f1.createNewFile(); File f2 = new File(inputDir, TEST2_FILE); f2.createNewFile(); BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[]{inputDir.getPath()}, FolderReader.PARAM_RECURSIVE, false); assertTrue(bcr.hasNext()); bcr.getNext(jCas.getCas()); assertFilesEquals(f2.getPath(), getSource(jCas)); jCas.reset(); File f3 = new File(inputDir, TEST3_FILE); f3.createNewFile(); Thread.sleep(TIMEOUT); assertTrue(bcr.hasNext()); bcr.getNext(jCas.getCas()); assertFilesEquals(f3.getPath(), getSource(jCas)); bcr.close(); f1.delete(); f2.delete(); subdir.delete(); } @Test public void testModifiedFile() throws Exception{ BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[]{inputDir.getPath()}, FolderReader.PARAM_REPROCESS_ON_MODIFY, true); assertFalse(bcr.doHasNext()); File f = new File(inputDir, TEXT1_FILE); f.createNewFile(); //Wait for file to be written and change detected Thread.sleep(TIMEOUT); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertFilesEquals(f.getPath(), getSource(jCas)); jCas.reset(); //Modify file Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f))); writer.write("Test"); writer.close(); Thread.sleep(TIMEOUT); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertFilesEquals(f.getPath(), getSource(jCas)); assertEquals("Test", jCas.getDocumentText().trim()); assertFalse(bcr.doHasNext()); bcr.close(); } @Test public void testDeleteFile() throws Exception{ BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[]{inputDir.getPath()}); assertFalse(bcr.doHasNext()); File f = new File(inputDir, TEXT1_FILE); f.createNewFile(); //Wait for file to be written and change detected Thread.sleep(TIMEOUT); f.delete(); //Wait for file to be written and change detected Thread.sleep(TIMEOUT); assertFalse(bcr.doHasNext()); bcr.close(); } @Test public void testExistingFiles() throws Exception{ File f1 = new File(inputDir, TEXT1_FILE); f1.createNewFile(); File f2 = new File(inputDir, TEST2_FILE); f2.createNewFile(); BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[]{inputDir.getPath()}); assertNextSourceNotNull(bcr); assertNextSourceNotNull(bcr); assertFalse(bcr.doHasNext()); bcr.close(); } @Test public void testFilenameFilter1() throws Exception{ File f1 = new File(inputDir, TEXT1_FILE); f1.createNewFile(); File f2 = new File(inputDir, "test2.log"); f2.createNewFile(); File f3 = new File(inputDir, "test3.TXT"); f3.createNewFile(); BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[]{inputDir.getPath()}, FolderReader.PARAM_ACCEPTED_PATTERNS, new String[]{".*\\.txt"}); assertNextSourceNotNull(bcr); assertNextSourceNotNull(bcr); assertFalse(bcr.doHasNext()); bcr.close(); } @Test public void testFilenameFilter2() throws Exception{ File f1 = new File(inputDir, TEXT1_FILE); f1.createNewFile(); File f2 = new File(inputDir, "test2.log"); f2.createNewFile(); File f3 = new File(inputDir, "test3.TXT"); f3.createNewFile(); BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[]{inputDir.getPath()}, FolderReader.PARAM_ACCEPTED_PATTERNS, new String[]{".*[2-3].*"}); assertNextSourceNotNull(bcr); assertNextSourceNotNull(bcr); assertFalse(bcr.doHasNext()); bcr.close(); } @Test public void testCreateDirectoryNotProcessed() throws Exception { BaleenCollectionReader bcr = getCollectionReader( FolderReader.PARAM_FOLDERS, new String[] { inputDir.getPath() }); assertFalse(bcr.doHasNext()); File folder = new File(inputDir, DIR); folder.mkdir(); Thread.sleep(TIMEOUT); assertFalse(bcr.doHasNext()); folder.delete(); } @Test public void testCreateDirectoryIsWatched() throws Exception { BaleenCollectionReader bcr = getCollectionReader( FolderReader.PARAM_FOLDERS, new String[] { inputDir.getPath() }); assertFalse(bcr.doHasNext()); File folder = new File(inputDir, DIR); folder.mkdir(); Thread.sleep(TIMEOUT); assertFalse(bcr.doHasNext()); File f11 = new File(folder, TEXT1_FILE); f11.createNewFile(); Thread.sleep(TIMEOUT); assertNextSourceNotNull(bcr); f11.delete(); folder.delete(); } @Test public void testCreateDirectoryIsNotWatchedIfNotRecursive() throws Exception { BaleenCollectionReader bcr = getCollectionReader( FolderReader.PARAM_RECURSIVE, false, FolderReader.PARAM_FOLDERS, new String[] { inputDir.getPath() }); assertFalse(bcr.doHasNext()); File folder = new File(inputDir, DIR); folder.mkdir(); Thread.sleep(TIMEOUT); assertFalse(bcr.doHasNext()); File f11 = new File(folder, TEXT1_FILE); f11.createNewFile(); Thread.sleep(TIMEOUT); assertFalse(bcr.doHasNext()); f11.delete(); folder.delete(); } private void assertNextSourceNotNull(BaleenCollectionReader bcr) throws Exception{ assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertNotNull(getSource(jCas)); jCas.reset(); } private String getSource(JCas jCas){ DocumentAnnotation doc = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); return doc.getSourceUri(); } private void assertFilesEquals(String s1, String s2) throws IOException{ File f1 = new File(s1); File f2 = new File(s2); assertTrue(Files.isSameFile(f1.toPath(), f2.toPath())); } }