package com.bericotech.clavin.extractor; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.io.File; import java.io.IOException; import java.util.List; import org.junit.Test; import com.bericotech.clavin.util.TextUtils; /*##################################################################### * * CLAVIN (Cartographic Location And Vicinity INdexer) * --------------------------------------------------- * * Copyright (C) 2012-2013 Berico Technologies * http://clavin.bericotechnologies.com * * ==================================================================== * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. * * ==================================================================== * * ApacheExtractorTest.java * *###################################################################*/ /** * Checks output produced by named entity recognizer (NER), supplied * by Apache OpenNLP Name Finder as the default extractor for CLAVIN. * */ public class ApacheExtractorTest { /** * Ensures we're getting good responses from the * {@link ApacheExtractor}, and that we can properly tag multiple * documents with the same instance. * @throws IOException */ @Test public void testExtractLocationNames() throws IOException { // instantiate the extractor ApacheExtractor extractor = new ApacheExtractor(); // a sample input file with some text about Somalia File inputFile = new File("src/test/resources/sample-docs/Somalia-doc.txt"); // slurp the contents of the file into a String String inputString = TextUtils.fileToString(inputFile); // extract named location entities from the input String List<LocationOccurrence> locationNames1 = extractor.extractLocationNames(inputString); // make sure we're getting valid output from the extractor // (testing the *correctness* of the output is really the // responsibility of the Apache OpenNLP NameFinder developers!) assertNotNull("Null location name list received from extractor.", locationNames1); assertFalse("Empty location name list received from extractor.", locationNames1.isEmpty()); assertTrue("Extractor choked/quit after first LOCATION.", locationNames1.size() > 1); // make sure that if we run the extractor on the same input a // second time, we get the same output List<LocationOccurrence> locationNames2 = extractor.extractLocationNames(inputString); assertEquals("Different extractor results for subsequent identical document.", locationNames1, locationNames2); } /** * Ensures we get the expected exception on null input. * @throws IOException */ @Test(expected=IllegalArgumentException.class) public void testNullInput() throws IOException { ApacheExtractor extractor = new ApacheExtractor(); extractor.extractLocationNames(null); } }