//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.regex; import org.junit.Test; import uk.gov.dstl.baleen.annotators.regex.internals.UrlRegex; import uk.gov.dstl.baleen.annotators.testing.AbstractAnnotatorTest; import uk.gov.dstl.baleen.annotators.testing.TestEntity; import uk.gov.dstl.baleen.types.common.Url; import uk.gov.dstl.baleen.types.language.Text; /** * */ public class UrlTest extends AbstractAnnotatorTest { public UrlTest() { super(UrlRegex.class); } @Test public void test() throws Exception{ jCas.setDocumentText("Dstl's website is http://www.dstl.gov.uk/. An example FTP directory is ftp://foo.example.com/this/is/a/path.txt. Here's a secure URL https://www.example.com/index.php?test=true . Some naughty person hasn't specified a schema here... www.example.com/path/to/page.html."); processJCas(); assertAnnotations(4, Url.class, new TestEntity<>(0, "http://www.dstl.gov.uk/"), new TestEntity<>(1, "ftp://foo.example.com/this/is/a/path.txt"), new TestEntity<>(2, "https://www.example.com/index.php?test=true"), new TestEntity<>(3, "www.example.com/path/to/page.html")); } @Test public void testWithText() throws Exception{ jCas.setDocumentText("Dstl's website is http://www.dstl.gov.uk/. An example FTP directory is ftp://foo.example.com/this/is/a/path.txt. Here's a secure URL https://www.example.com/index.php?test=true . Some naughty person hasn't specified a schema here... www.example.com/path/to/page.html."); Text t1 = new Text(jCas, 0, 43); t1.addToIndexes(); Text t2 = new Text(jCas, 180, jCas.getDocumentText().length()); t2.addToIndexes(); processJCas(); assertAnnotations(2, Url.class, new TestEntity<>(0, "http://www.dstl.gov.uk/"), new TestEntity<>(1, "www.example.com/path/to/page.html")); } }