//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.collectionreaders;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.uima.UIMAException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.DocumentAnnotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import uk.gov.dstl.baleen.collectionreaders.testing.AbstractReaderTest;
import uk.gov.dstl.baleen.exceptions.BaleenException;
import uk.gov.dstl.baleen.uima.BaleenCollectionReader;
public class MucReaderTest extends AbstractReaderTest{
public MucReaderTest() {
super(MucReader.class);
}
private static final String MUC = "DEV-MUC3-0001 (NOSC)\n\n" +
"SAN SALVADOR, 3 JAN 90 -- [REPORT] [ARMED FORCES PRESS COMMITTEE,\n" +
"COPREFA] [TEXT] THE ARCE BATTALION COMMAND HAS REPORTED THAT ABOUT 50\n" +
"PEASANTS OF VARIOUS AGES HAVE BEEN KIDNAPPED BY TERRORISTS OF THE\n" +
"FARABUNDO MARTI NATIONAL LIBERATION FRONT [FMLN] IN SAN MIGUEL\n" +
"DEPARTMENT. ACCORDING TO THAT GARRISON, THE MASS KIDNAPPING TOOK PLACE ON\n" +
"30 DECEMBER IN SAN LUIS DE LA REINA. THE SOURCE ADDED THAT THE TERRORISTS\n" +
"FORCED THE INDIVIDUALS, WHO WERE TAKEN TO AN UNKNOWN LOCATION, OUT OF\n" +
"THEIR RESIDENCES, PRESUMABLY TO INCORPORATE THEM AGAINST THEIR WILL INTO\n" +
"CLANDESTINE GROUPS.";
private static Path tmpDir;
@BeforeClass
public static void beforeClass() throws IOException {
tmpDir = Files.createTempDirectory("muctest");
Files.write(tmpDir.resolve("file"), MUC.getBytes(StandardCharsets.UTF_8));
}
@AfterClass
public static void afterClass() {
tmpDir.toFile().delete();
}
@Test
public void testNoFiles() throws UIMAException, IOException {
Path emptyTmpDir = Files.createTempDirectory("muctest");
try{
BaleenCollectionReader bcr = getCollectionReader(MucReader.KEY_PATH, emptyTmpDir.toAbsolutePath().toString());
bcr.initialize();
fail("Expected exception not thrown");
}catch(ResourceInitializationException be){
//Do nothing, expected exception
assertEquals(BaleenException.class, be.getCause().getClass());
}
emptyTmpDir.toFile().delete();
}
@Test
public void testKeyFile() throws UIMAException, IOException {
Path keyTmpDir = Files.createTempDirectory("muctest");
Files.write(keyTmpDir.resolve("key-test"), MUC.getBytes(StandardCharsets.UTF_8));
try{
BaleenCollectionReader bcr = getCollectionReader(MucReader.KEY_PATH, keyTmpDir.toAbsolutePath().toString());
bcr.initialize();
fail("Expected exception not thrown");
}catch(ResourceInitializationException be){
//Do nothing, expected exception
assertEquals(BaleenException.class, be.getCause().getClass());
}
keyTmpDir.toFile().delete();
}
@Test
public void test() throws UIMAException, IOException {
BaleenCollectionReader bcr = getCollectionReader(MucReader.KEY_PATH, tmpDir.toAbsolutePath().toString());
bcr.initialize();
assertTrue(bcr.doHasNext());
bcr.getNext(jCas.getCas());
assertEquals("DEV-MUC3-0001 (NOSC)", getSource(jCas));
String s = "THE ARCE BATTALION COMMAND HAS REPORTED THAT ABOUT 50 " +
"PEASANTS OF VARIOUS AGES HAVE BEEN KIDNAPPED BY TERRORISTS OF THE " +
"FARABUNDO MARTI NATIONAL LIBERATION FRONT IN SAN MIGUEL " +
"DEPARTMENT. ACCORDING TO THAT GARRISON, THE MASS KIDNAPPING TOOK PLACE ON " +
"30 DECEMBER IN SAN LUIS DE LA REINA. THE SOURCE ADDED THAT THE TERRORISTS " +
"FORCED THE INDIVIDUALS, WHO WERE TAKEN TO AN UNKNOWN LOCATION, OUT OF " +
"THEIR RESIDENCES, PRESUMABLY TO INCORPORATE THEM AGAINST THEIR WILL INTO " +
"CLANDESTINE GROUPS.";
s = s.toLowerCase();
assertEquals(s, jCas.getDocumentText());
assertFalse(bcr.doHasNext());
bcr.close();
}
private String getSource(JCas jCas){
DocumentAnnotation doc = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
return doc.getSourceUri();
}
}