/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.opennlp; import java.io.IOException; import java.util.Arrays; import opennlp.tools.chunker.Chunker; import opennlp.tools.chunker.ChunkerModel; import opennlp.tools.namefind.TokenNameFinder; import opennlp.tools.namefind.TokenNameFinderModel; import opennlp.tools.postag.POSModel; import opennlp.tools.postag.POSTagger; import opennlp.tools.sentdetect.SentenceDetector; import opennlp.tools.sentdetect.SentenceModel; import opennlp.tools.tokenize.SimpleTokenizer; import opennlp.tools.tokenize.Tokenizer; import opennlp.tools.tokenize.TokenizerModel; import org.apache.stanbol.commons.opennlp.OpenNLP; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; /** * Test loading of OpenNLP models. This was previously implicitly tested by the * in the meantime removed TextAnalyzer test. * @author Rupert Westenthaler * */ public class OpenNLPTest { private static OpenNLP openNLP; @BeforeClass public static void init(){ openNLP = new OpenNLP(new ClasspathDataFileProvider("DUMMY")); } @Test public void testLoadEnTokenizer() throws IOException{ TokenizerModel model = openNLP.getTokenizerModel("en"); Assert.assertNotNull(model); Tokenizer tokenizer = openNLP.getTokenizer("en"); Assert.assertNotNull(tokenizer); } @Test public void testLoadMissingTokenizerModel() throws IOException{ TokenizerModel model = openNLP.getTokenizerModel("ru"); //there is not Russian model ... //so it is expected that the model is NULL Assert.assertNull(model); } @Test public void testFallbackToSimpleTokenizer() throws IOException{ //however for the tokenizer it is expected that a fallback to the //SimpleTokenizer is made Tokenizer tokenizer = openNLP.getTokenizer("ru"); Assert.assertNotNull(tokenizer); Assert.assertEquals(SimpleTokenizer.INSTANCE, tokenizer); } @Test public void testLoadEnSentence() throws IOException{ SentenceModel model = openNLP.getSentenceModel("en"); Assert.assertNotNull(model); SentenceDetector sentDetector = openNLP.getSentenceDetector("en"); Assert.assertNotNull(sentDetector); } @Test public void testLoadMissingSentence() throws IOException{ SentenceModel model = openNLP.getSentenceModel("ru"); Assert.assertNull(model); SentenceDetector sentDetector = openNLP.getSentenceDetector("ru"); Assert.assertNull(sentDetector); } @Test public void testLoadEnPOS() throws IOException{ POSModel model = openNLP.getPartOfSpeechModel("en"); Assert.assertNotNull(model); POSTagger posTagger = openNLP.getPartOfSpeechTagger("en"); Assert.assertNotNull(posTagger); } @Test public void testLoadMissingPOS() throws IOException{ POSModel model = openNLP.getPartOfSpeechModel("ru"); Assert.assertNull(model); POSTagger posTagger = openNLP.getPartOfSpeechTagger("ru"); Assert.assertNull(posTagger); } @Test public void testLoadEnChunker() throws IOException{ ChunkerModel model = openNLP.getChunkerModel("en"); Assert.assertNotNull(model); Chunker chunker = openNLP.getChunker("en"); Assert.assertNotNull(chunker); } @Test public void testLoadMissingChunker() throws IOException{ ChunkerModel model = openNLP.getChunkerModel("ru"); Assert.assertNull(model); Chunker chunker = openNLP.getChunker("ru"); Assert.assertNull(chunker); } @Test public void testLoadEnNER() throws IOException{ for(String type : Arrays.asList("person","organization","location")){ TokenNameFinderModel model = openNLP.getNameModel(type, "en"); Assert.assertNotNull(model); TokenNameFinder ner = openNLP.getNameFinder(type, "en"); Assert.assertNotNull(ner); } } @Test public void testLoadMissingNER() throws IOException{ //first unknown type TokenNameFinderModel model = openNLP.getNameModel("person2", "en"); Assert.assertNull(model); TokenNameFinder ner = openNLP.getNameFinder("person2", "en"); Assert.assertNull(ner); //unknown language model = openNLP.getNameModel("person", "ru"); Assert.assertNull(model); ner = openNLP.getNameFinder("person", "ru"); Assert.assertNull(ner); } @Test public void testLoadModelByName() throws IOException{ TokenizerModel tokenModel = openNLP.getModel(TokenizerModel.class, "en-token.bin", null); Assert.assertNotNull(tokenModel); SentenceModel sentModel = openNLP.getModel(SentenceModel.class, "en-sent.bin", null); Assert.assertNotNull(sentModel); POSModel posModel = openNLP.getModel(POSModel.class, "en-pos-maxent.bin", null); Assert.assertNotNull(posModel); ChunkerModel chunkModel = openNLP.getModel(ChunkerModel.class, "en-chunker.bin", null); Assert.assertNotNull(chunkModel); TokenNameFinderModel nerModel = openNLP.getModel(TokenNameFinderModel.class, "en-ner-person.bin", null); Assert.assertNotNull(nerModel); //unavailable model tokenModel = openNLP.getModel(TokenizerModel.class, "ru-token.bin", null); Assert.assertNull(tokenModel); } @Test(expected=IllegalStateException.class) public void testLoadIncompatibleModelByName() throws IOException{ SentenceModel sentModel = openNLP.getModel(SentenceModel.class, "en-token.bin", null); Assert.assertNotNull(sentModel); } }