//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.misc; import static org.junit.Assert.assertEquals; import org.apache.uima.jcas.tcas.DocumentAnnotation; import org.junit.Test; import uk.gov.dstl.baleen.annotators.misc.DocumentTypeByFilename; import uk.gov.dstl.baleen.annotators.testing.AbstractAnnotatorTest; public class DocumentTypeByFilenameTest extends AbstractAnnotatorTest { public DocumentTypeByFilenameTest() { super(DocumentTypeByFilename.class); } @Test public void testDefault() throws Exception { DocumentAnnotation da = getDocumentAnnotation(); da.setSourceUri("20170127-Test_Document.docx"); processJCas(); assertEquals("docx", da.getDocType()); } @Test public void testPrefix() throws Exception { DocumentAnnotation da = getDocumentAnnotation(); da.setSourceUri("20170127-Test_Document.docx"); processJCas(DocumentTypeByFilename.PARAM_PREFIX, "filetype_"); assertEquals("filetype_docx", da.getDocType()); } @Test public void testPattern() throws Exception { DocumentAnnotation da = getDocumentAnnotation(); da.setSourceUri("20170127-Test_Document.docx"); processJCas(DocumentTypeByFilename.PARAM_PATTERN, "(\\d{4}).*"); assertEquals("2017", da.getDocType()); } @Test public void testPatternNoMatch() throws Exception { DocumentAnnotation da = getDocumentAnnotation(); da.setSourceUri("20170127-Test_Document.docx"); processJCas(DocumentTypeByFilename.PARAM_PATTERN, "([a-z]{2}).*", DocumentTypeByFilename.PARAM_DEFAULT, "unknown"); assertEquals("unknown", da.getDocType()); } @Test public void testPatternCaseSensitiveFalse() throws Exception { DocumentAnnotation da = getDocumentAnnotation(); da.setSourceUri("20170127-Test_Document.docx"); processJCas(DocumentTypeByFilename.PARAM_PATTERN, "\\d{8}-([a-z]).*", DocumentTypeByFilename.PARAM_DEFAULT, "unknown"); assertEquals("t", da.getDocType()); } @Test public void testPatternCaseSensitiveTrue() throws Exception { DocumentAnnotation da = getDocumentAnnotation(); da.setSourceUri("20170127-Test_Document.docx"); processJCas(DocumentTypeByFilename.PARAM_PATTERN, "\\d{8}-([a-z]).*", DocumentTypeByFilename.PARAM_DEFAULT, "unknown", DocumentTypeByFilename.PARAM_CASE_SENSITIVE, true); assertEquals("unknown", da.getDocType()); } @Test public void testGroup() throws Exception { DocumentAnnotation da = getDocumentAnnotation(); da.setSourceUri("20170127-Test_Document.docx"); processJCas(DocumentTypeByFilename.PARAM_PATTERN, "(\\d{4})(\\d{2})(\\d{2}).*", DocumentTypeByFilename.PARAM_GROUP, 2); assertEquals("01", da.getDocType()); } @Test public void testLowerCase() throws Exception { DocumentAnnotation da = getDocumentAnnotation(); da.setSourceUri("20170127-Test_Document.docx"); processJCas(DocumentTypeByFilename.PARAM_PATTERN, "\\d{8}-([a-z]).*", DocumentTypeByFilename.PARAM_LOWER_CASE, false); assertEquals("T", da.getDocType()); } }