/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package opennlp.tools.namefind; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import opennlp.tools.dictionary.Dictionary; import opennlp.tools.tokenize.SimpleTokenizer; import opennlp.tools.util.Span; import opennlp.tools.util.StringList; /** *Tests for the {@link DictionaryNameFinder} class. */ public class DictionaryNameFinderTest { private Dictionary mDictionary = new Dictionary(); private TokenNameFinder mNameFinder; public DictionaryNameFinderTest() { StringList vanessa = new StringList(new String[]{"Vanessa"}); mDictionary.put(vanessa); StringList vanessaWilliams = new StringList("Vanessa", "Williams"); mDictionary.put(vanessaWilliams); StringList max = new StringList(new String[]{"Max"}); mDictionary.put(max); StringList michaelJordan = new StringList("Michael", "Jordan"); mDictionary.put(michaelJordan); } @Before public void setUp() throws Exception { mNameFinder = new DictionaryNameFinder(mDictionary); } @Test public void testSingleTokeNameAtSentenceStart() { String sentence = "Max a b c d"; SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; String[] tokens = tokenizer.tokenize(sentence); Span[] names = mNameFinder.find(tokens); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].getStart() == 0 && names[0].getEnd() == 1); } @Test public void testSingleTokeNameInsideSentence() { String sentence = "a b Max c d"; SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; String[] tokens = tokenizer.tokenize(sentence); Span[] names = mNameFinder.find(tokens); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].getStart() == 2 && names[0].getEnd() == 3); } @Test public void testSingleTokeNameAtSentenceEnd() { String sentence = "a b c Max"; SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; String[] tokens = tokenizer.tokenize(sentence); Span[] names = mNameFinder.find(tokens); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 4); } @Test public void testLastMatchingTokenNameIsChoosen() { String[] sentence = {"a", "b", "c", "Vanessa"}; Span[] names = mNameFinder.find(sentence); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 4); } @Test public void testLongerTokenNameIsPreferred() { String[] sentence = {"a", "b", "c", "Vanessa", "Williams"}; Span[] names = mNameFinder.find(sentence); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5); } @Test public void testCaseSensitivity() { String[] sentence = {"a", "b", "c", "vanessa", "williams"}; Span[] names = mNameFinder.find(sentence); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5); } @Test public void testCaseLongerEntry() { String[] sentence = {"a", "b", "michael", "jordan"}; Span[] names = mNameFinder.find(sentence); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].length() == 2); } }