//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.gazetteer; import java.util.Collections; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.descriptor.ExternalResource; import com.google.common.collect.ImmutableSet; import uk.gov.dstl.baleen.annotators.gazetteer.helpers.AbstractStemmingAhoCorasickAnnotator; import uk.gov.dstl.baleen.annotators.gazetteer.helpers.GazetteerUtils; import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction; import uk.gov.dstl.baleen.exceptions.BaleenException; import uk.gov.dstl.baleen.resources.SharedMongoResource; import uk.gov.dstl.baleen.resources.gazetteer.IGazetteer; import uk.gov.dstl.baleen.resources.gazetteer.MongoGazetteer; /** * Generic Mongo-backed Stemming RadixTree Gazetteer annotator, that will use a Mongo gazetteer to find and annotate entities. * * @baleen.javadoc */ public class MongoStemming extends AbstractStemmingAhoCorasickAnnotator { /** * Connection to Mongo * * @baleen.resource uk.gov.dstl.baleen.resources.SharedMongoResource */ public static final String KEY_MONGO = "mongo"; @ExternalResource(key = KEY_MONGO) private SharedMongoResource mongo; /** * The name of the Mongo collection containing the gazetteer * * @baleen.config gazetteer */ public static final String PARAM_COLLECTION = "collection"; @ConfigurationParameter(name = PARAM_COLLECTION, defaultValue = "gazetteer") private String collection; /** * The name of the field in Mongo that contains the gazetteer values * * @baleen.config value */ public static final String PARAM_VALUE_FIELD = "valueField"; @ConfigurationParameter(name = PARAM_VALUE_FIELD, defaultValue = "value") private String valueField; /** * Constructor */ public MongoStemming() { // Do nothing } @Override public IGazetteer configureGazetteer() throws BaleenException { IGazetteer gaz = new MongoGazetteer(); gaz.init(mongo, GazetteerUtils.configureMongo(caseSensitive, collection, valueField)); return gaz; } @Override public AnalysisEngineAction getAction() { return new AnalysisEngineAction(Collections.emptySet(), ImmutableSet.of(entityType)); } }