/** Copyright 2009-2013 The MITRE Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. * ************************************************************************** * NOTICE * This software was produced for the U. S. Government under Contract No. * W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer * Software and Noncommercial Computer Software Documentation Clause * 252.227-7014 (JUN 1995) * * (c) 2012 The MITRE Corporation. All Rights Reserved. * ************************************************************************** **/ package org.opensextant.toolbox; import java.util.List; import org.opensextant.matching.MatcherFactory; import org.opensextant.matching.PlacenameMatcher; import org.opensextant.placedata.PlaceCandidate; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import gate.AnnotationSet; import gate.Factory; import gate.FeatureMap; import gate.ProcessingResource; import gate.Resource; import gate.creole.AbstractLanguageAnalyser; import gate.creole.ExecutionException; import gate.creole.ResourceInstantiationException; import gate.creole.metadata.CreoleParameter; import gate.creole.metadata.CreoleResource; import gate.creole.metadata.Optional; import gate.creole.metadata.RunTime; import gate.util.InvalidOffsetException; /** * A Solr-based ProcessingResource that tags mentions of geospatial candidates * found in a dcoument. * * @author David Smiley, MITRE, dsmiley@mitre.org * @author Marc Ubaldino, MITRE, ubaldino@mitre.org * */ @CreoleResource(name = "OpenSextant NaiveTaggerSolr", comment = "A Solr-based tagger") public class NaiveTaggerSolrPR extends AbstractLanguageAnalyser implements ProcessingResource { /** * */ private static final long serialVersionUID = -6167312014577862928L; // Log object private static final Logger LOGGER = LoggerFactory.getLogger(NaiveTaggerSolrPR.class); private transient PlacenameMatcher matcher; private String outputASName; private String annotationType; private boolean outputASNameSet = false; // location of solr gazetteer private String gazetteerHome; // The parameters passed in by the user String inputASName; // The name of the input AnnotationSet Boolean tagAbbreviations; // tag placenames which are abreviations or codes // TODO expose calibrate and calibrateScore as PR parameters // to force all confidences to calibrateScore for calibration boolean calibrate = false; double calibrateScore = 0.0; /** * @return gate_resource * @throws ResourceInstantiationException */ @Override public Resource init() throws ResourceInstantiationException { super.init(); // Check to see if Matcherfactory has not been configured if (!MatcherFactory.isConfigured()) { // no gazeetteer home given, try to use default if (gazetteerHome == null || gazetteerHome.length() == 0) { LOGGER.info("No gazetter home given for MatcherFactory, trying default config"); MatcherFactory.config(""); // if the the default worked, start if (MatcherFactory.isConfigured()) { MatcherFactory.start(); LOGGER.info("NaiveTagger is using MatcherFactory configured to use " + MatcherFactory.getHomeLocation()); } else { LOGGER.error("No config given and no default set"); return this; } } else { MatcherFactory.config(gazetteerHome); MatcherFactory.start(); } } else { // already configured if (!MatcherFactory.isStarted()) { MatcherFactory.start(); } LOGGER.info("NaiveTagger is using MatcherFactory configured to use " + MatcherFactory.getHomeLocation()); } // see if the Factory is running if (!MatcherFactory.isStarted()) { LOGGER.error("MatcherFactory did not start. Not configured?"); return this; } matcher = MatcherFactory.getMatcher(); if (matcher == null) { LOGGER.error("Could not get a matcher from MatcherFactory. Not configured?"); return this; } matcher.tagAbbreviations(tagAbbreviations); return this; } /** * */ @Override public void cleanup() { super.cleanup(); if (matcher != null) { matcher.cleanup(); } } /** */ @Override public void execute() throws ExecutionException { if (matcher == null) { throw new IllegalStateException("This PR hasn't been init'ed!"); } List<PlaceCandidate> matches = null; try { matches = matcher.matchText(document.getContent().toString(), document.getName()); } catch (Exception err) { LOGGER.error("Error when tagging document " + document.getName(), err); return; } // If no output Annotation set was given, append to the input AS AnnotationSet annotSet = outputASNameSet ? document.getAnnotations(outputASName) : document.getAnnotations(); for (PlaceCandidate pc : matches) { // create and populate the PlaceCandidate annotation FeatureMap feats = Factory.newFeatureMap(); feats.put("string", pc.getPlaceName()); feats.put("placeCandidate", pc); if (calibrate) { pc.setPlaceConfidenceScore(calibrateScore); } try { annotSet.add(pc.getStart(), pc.getEnd(), annotationType, feats); } catch (InvalidOffsetException offsetErr) { LOGGER.error("Error when adding PlaceCandidate to document in " + document.getName(), offsetErr); } } } /** * @return */ public String getInputASName() { return inputASName; } /** * @param inputASName */ @Optional @RunTime @CreoleParameter public void setInputASName(String inputASName) { this.inputASName = inputASName; } /** * @return */ public String getOutputASName() { return outputASName; } /** * @return */ public String getAnnotationType() { return annotationType; } /** * @param annotationType */ @Optional @RunTime @CreoleParameter(defaultValue = "placecandidate") public void setAnnotationType(String annotationType) { this.annotationType = annotationType; } public String getGazHome() { return gazetteerHome; } @CreoleParameter public void setGazHome(String gazHome) { this.gazetteerHome = gazHome; } /** * @param outputASName */ @Optional @RunTime @CreoleParameter public void setOutputASName(String outputASName) { this.outputASName = outputASName; outputASNameSet = (outputASName != null && !outputASName.isEmpty()); } public Boolean getTagAbbreviations() { return tagAbbreviations; } @Optional @CreoleParameter(defaultValue = "false") public void setTagAbbreviations(Boolean tagAbbreviations) { this.tagAbbreviations = tagAbbreviations; } }