/**
* Copyright 2007-2014
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package de.tudarmstadt.ukp.dkpro.core.stanfordnlp;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
import static org.apache.uima.fit.util.JCasUtil.select;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.junit.Assume;
import org.junit.Rule;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;
import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations;
import de.tudarmstadt.ukp.dkpro.core.testing.AssumeResource;
import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext;
import de.tudarmstadt.ukp.dkpro.core.testing.TestRunner;
/**
*/
public class StanfordNamedEntityRecognizerTest
{
@Test
public void testDutchFremeNer()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("nl", "freme-wikiner", "10 jaar Markus werkzaam bij SAP in Duitsland .");
String[] ne = {
"[ 8, 14]Person(I-PER) (Markus)",
"[ 28, 31]Organization(I-ORG) (SAP)",
"[ 35, 44]Location(I-LOC) (Duitsland)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void testEnglish()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("en", null, "IBM where John Miller works is in Germany .");
String[] ne = {
"[ 0, 3]Organization(ORGANIZATION) (IBM)",
"[ 10, 21]Person(PERSON) (John Miller)",
"[ 34, 41]Location(LOCATION) (Germany)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void testEnglishAdjacent()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("en", null, "Jake John called late at night .");
String[] ne = {
"[ 0, 9]Person(PERSON) (Jake John)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void testEnglishFremeNer()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("en", "freme-wikiner", "IBM where John Miller works is in Germany .");
String[] ne = {
"[ 0, 3]Organization(I-ORG) (IBM)",
"[ 10, 21]Person(I-PER) (John Miller)",
"[ 34, 41]Location(I-LOC) (Germany)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void test3classCaselessEnglish()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("en", "all.3class.caseless.distsim.crf", "ibm where john works is in germany .");
String[] ne = {
"[ 0, 3]Organization(ORGANIZATION) (ibm)",
"[ 10, 14]Person(PERSON) (john)",
"[ 27, 34]Location(LOCATION) (germany)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void testNoWiki3classCaselessEnglish()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("en", "nowiki.3class.caseless.distsim.crf", "ibm where john works is in germany .");
String[] ne = {
"[ 0, 3]Organization(ORGANIZATION) (ibm)",
"[ 10, 14]Person(PERSON) (john)",
"[ 27, 34]Location(LOCATION) (germany)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void test4classEnglish()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("en", "conll.4class.distsim.crf", "IBM where John works is in Germany .");
String[] ne = {
"[ 0, 3]Organization(ORGANIZATION) (IBM)",
"[ 10, 14]Person(PERSON) (John)",
"[ 27, 34]Location(LOCATION) (Germany)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void test4classCaselessEnglish()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("en", "conll.4class.caseless.distsim.crf", "ibm where john works is in germany .");
String[] ne = {
"[ 0, 3]Organization(ORGANIZATION) (ibm)",
"[ 10, 14]Person(PERSON) (john)",
"[ 27, 34]Location(LOCATION) (germany)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void test4classCaselessMixedEnglish()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("en", "conll.4class.caseless.distsim.crf", "IBM where john works is in Germany .");
String[] ne = {
"[ 0, 3]Organization(ORGANIZATION) (IBM)",
"[ 10, 14]Person(PERSON) (john)",
"[ 27, 34]Location(LOCATION) (Germany)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void test7classEnglish()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("en", "muc.7class.distsim.crf", "IBM where John works is in Germany .");
String[] ne = {
"[ 0, 3]Organization(ORGANIZATION) (IBM)",
"[ 10, 14]Person(PERSON) (John)",
"[ 27, 34]Location(LOCATION) (Germany)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void testEnglishWithNEInLastToken()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("en", null, "IBM where John works is in Germany");
String[] ne = {
"[ 0, 3]Organization(ORGANIZATION) (IBM)",
"[ 10, 14]Person(PERSON) (John)",
"[ 27, 34]Location(LOCATION) (Germany)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void testGerman()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("de", null, "Markus arbeitet seit 10 Jahren bei SAP in Deutschland .");
String[] ne = {
"[ 0, 6]Person(I-PER) (Markus)",
"[ 35, 38]Organization(I-ORG) (SAP)",
"[ 42, 53]Location(I-LOC) (Deutschland)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void testGermanNemgp()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("de", "nemgp", "Markus arbeitet seit 10 Jahren bei SAP in Deutschland .");
String[] ne = {
"[ 0, 6]Person(PER) (Markus)",
"[ 35, 38]Organization(ORG) (SAP)",
"[ 42, 53]Location(LOC) (Deutschland)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void testHgcGerman()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("de", "hgc_175m_600.crf", "Markus arbeitet seit 10 Jahren bei SAP in Deutschland .");
String[] ne = {
"[ 0, 6]Person(I-PER) (Markus)",
"[ 35, 38]Organization(I-ORG) (SAP)",
"[ 42, 53]Location(I-LOC) (Deutschland)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void testFrenchFremeNer()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("fr", "freme-wikiner", "Il y a 10 ans Markus travaille dans SAP en Allemagne .");
String[] ne = {
"[ 14, 20]Person(I-PER) (Markus)",
"[ 36, 39]Organization(I-ORG) (SAP)",
"[ 43, 52]Location(I-LOC) (Allemagne)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void testItalianFremeNer()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("it", "freme-wikiner", "10 anni fa Markus lavora in SAP in Germania .");
String[] ne = {
"[ 11, 17]Person(I-PER) (Markus)",
"[ 28, 31]Organization(I-ORG) (SAP)",
"[ 35, 43]Location(I-LOC) (Germania)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void testRussianFremeNer()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("ru", "freme-wikiner", "10 лет Маркус работал в SAP в Германии .");
String[] ne = {
"[ 7, 13]Person(I-PER) (Маркус)",
"[ 24, 27]Organization(I-ORG) (SAP)",
"[ 30, 38]Location(I-LOC) (Германии)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void testSpanish()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("es", null, "Hace 10 años Markus trabaja en SAP en Alemania .");
String[] ne = {
"[ 13, 19]Person(PERS) (Markus)",
"[ 31, 34]Organization(ORG) (SAP)",
"[ 38, 46]Location(LUG) (Alemania)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test
public void testSpanishFremeNer()
throws Exception
{
Assume.assumeTrue(Runtime.getRuntime().maxMemory() > 1000000000);
JCas jcas = runTest("es", "freme-wikiner", "Hace 10 años Markus trabaja en SAP en Alemania .");
String[] ne = {
"[ 13, 19]Person(I-PER) (Markus)",
"[ 31, 34]NamedEntity(I-MISC) (SAP)",
"[ 38, 46]Location(I-LOC) (Alemania)" };
AssertAnnotations.assertNamedEntity(ne, select(jcas, NamedEntity.class));
}
@Test(expected = AnalysisEngineProcessException.class)
public void testMissingModel() throws Exception
{
runTest("xx", null, "Xec xena Xeo .");
}
private JCas runTest(String language, String variant, String testDocument)
throws Exception
{
AssumeResource.assumeResource(StanfordNamedEntityRecognizer.class, "ner", language,
variant);
AnalysisEngine engine = createEngine(StanfordNamedEntityRecognizer.class,
StanfordNamedEntityRecognizer.PARAM_VARIANT, variant,
StanfordNamedEntityRecognizer.PARAM_PRINT_TAGSET, true);
return TestRunner.runTest(engine, language, testDocument);
}
@Rule
public DkproTestContext testContext = new DkproTestContext();
}