ApacheExtractorTest.java example

Explorer

CLAVIN-master
- src
  - main
    - java
      - com
        bericotech
        clavin
        ClavinException.java
        GeoParser.java
        GeoParserFactory.java
        WorkflowDemo.java
        extractor
        ApacheExtractor.java
        LocationExtractor.java
        LocationOccurrence.java
        gazetteer
        BasicGeoName.java
        CountryCode.java
        FeatureClass.java
        FeatureCode.java
        FeatureCodeBuilder.java
        GeoName.java
        LazyAncestryGeoName.java
        query
        AncestryMode.java
        FuzzyMode.java
        Gazetteer.java
        GazetteerQuery.java
        LuceneGazetteer.java
        QueryBuilder.java
        index
        BinarySimilarity.java
        IndexDirectoryBuilder.java
        IndexField.java
        WhitespaceLowerCaseAnalyzer.java
        WhitespaceLowerCaseTokenizer.java
        resolver
        ClavinLocationResolver.java
        LocationResolver.java
        LuceneLocationResolver.java
        ResolvedLocation.java
        multipart
        DefaultScorer.java
        MatchedLocation.java
        MultipartLocationName.java
        MultipartLocationResolver.java
        ResolvedMultipartLocation.java
        Scorer.java
        SearchLevel.java
        SearchResult.java
        util
        DamerauLevenshtein.java
        ListUtils.java
        TextUtils.java
  - test
    - java
      - com
        bericotech
        clavin
        AllTestsSuite.java
        GeoParserFactoryTest.java
        GeoParserTest.java
        extractor
        ApacheExtractorTest.java
        LocationOccurrenceTest.java
        gazetteer
        BasicGeoNameTest.java
        LazyAncestryGeoNameTest.java
        query
        LuceneGazetteerTest.java
        index
        BinarySimilarityTest.java
        resolver
        ClavinLocationResolverHeuristicsTest.java
        ClavinLocationResolverTest.java
        ResolvedLocationTest.java
        multipart
        MultiLevelMultipartLocationResolverTest.java
        MultipartLocationResolverTest.java
        util
        DamerauLevenshteinTest.java
        ListUtilsTest.java
        TextUtilsTest.java

package com.bericotech.clavin.extractor;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.io.File;
import java.io.IOException;
import java.util.List;

import org.junit.Test;

import com.bericotech.clavin.util.TextUtils;

/*#####################################################################
 * 
 * CLAVIN (Cartographic Location And Vicinity INdexer)
 * ---------------------------------------------------
 * 
 * Copyright (C) 2012-2013 Berico Technologies
 * http://clavin.bericotechnologies.com
 * 
 * ====================================================================
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * permissions and limitations under the License.
 * 
 * ====================================================================
 * 
 * ApacheExtractorTest.java
 * 
 *###################################################################*/

/**
 * Checks output produced by named entity recognizer (NER), supplied
 * by Apache OpenNLP Name Finder as the default extractor for CLAVIN.
 * 
 */
public class ApacheExtractorTest {

    /**
     * Ensures we're getting good responses from the
     * {@link ApacheExtractor}, and that we can properly tag multiple
     * documents with the same instance.
     * @throws IOException 
     */
    @Test
    public void testExtractLocationNames() throws IOException {
        // instantiate the extractor
        ApacheExtractor extractor = new ApacheExtractor();
        
        // a sample input file with some text about Somalia
        File inputFile = new File("src/test/resources/sample-docs/Somalia-doc.txt");
        
        // slurp the contents of the file into a String
        String inputString = TextUtils.fileToString(inputFile);
        
        // extract named location entities from the input String
        List<LocationOccurrence> locationNames1 = extractor.extractLocationNames(inputString);
        
        // make sure we're getting valid output from the extractor
        // (testing the *correctness* of the output is really the
        // responsibility of the Apache OpenNLP NameFinder developers!)
        assertNotNull("Null location name list received from extractor.", locationNames1);
        assertFalse("Empty location name list received from extractor.", locationNames1.isEmpty());
        assertTrue("Extractor choked/quit after first LOCATION.", locationNames1.size() > 1);
        
        // make sure that if we run the extractor on the same input a
        // second time, we get the same output
        List<LocationOccurrence> locationNames2 = extractor.extractLocationNames(inputString);
        assertEquals("Different extractor results for subsequent identical document.", locationNames1, locationNames2);
    }
    
    /**
     * Ensures we get the expected exception on null input.
     * @throws IOException
     */
    @Test(expected=IllegalArgumentException.class)
    public void testNullInput() throws IOException {
        ApacheExtractor extractor = new ApacheExtractor();
        extractor.extractLocationNames(null);
    }
    
}