package edu.stanford.nlp.pipeline;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.util.logging.Redwood;
import edu.stanford.nlp.ie.NERClassifierCombiner;
import edu.stanford.nlp.ie.regexp.NumberSequenceClassifier;
import edu.stanford.nlp.naturalli.NaturalLogicAnnotator;
import edu.stanford.nlp.naturalli.OpenIE;
import edu.stanford.nlp.util.MetaClass;
import edu.stanford.nlp.util.PropertiesUtils;
import java.io.IOException;
import java.util.*;
/**
* A class abstracting the implementation of various annotators.
* Importantly, subclasses of this class can overwrite the implementation
* of these annotators by returning a different annotator, and
* {@link edu.stanford.nlp.pipeline.StanfordCoreNLP} will automatically load
* the new annotator instead.
*
* @author Gabor Angeli
*/
public class AnnotatorImplementations {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(AnnotatorImplementations.class);
/**
* Tokenize, emulating the Penn Treebank
*/
public Annotator tokenizer(Properties properties) {
return new TokenizerAnnotator(properties);
}
/**
* Clean XML input
*/
public CleanXmlAnnotator cleanXML(Properties properties) {
return new CleanXmlAnnotator(properties);
}
/**
* Sentence split, in addition to a bunch of other things in this annotator (be careful to check the implementation!)
*/
public Annotator wordToSentences(Properties properties) {
return new WordsToSentencesAnnotator(properties);
}
/**
* Part of speech tag
*/
public Annotator posTagger(Properties properties) {
String annotatorName = "pos";
return new POSTaggerAnnotator(annotatorName, properties);
}
/**
* Annotate lemmas
*/
public Annotator morpha(Properties properties, boolean verbose) {
return new MorphaAnnotator(verbose);
}
/**
* Annotate for named entities -- note that this combines multiple NER tag sets, and some auxiliary things (like temporal tagging)
*/
public Annotator ner(Properties properties) {
try {
return new NERCombinerAnnotator(properties);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
/**
* Run TokensRegex -- annotate patterns found in tokens
*/
public Annotator tokensregex(Properties properties, String name) {
return new TokensRegexAnnotator(name, properties);
}
/**
* Run RegexNER -- rule-based NER based on a deterministic mapping file
*/
public Annotator tokensRegexNER(Properties properties, String name) {
return new TokensRegexNERAnnotator(name, properties);
}
/**
* Annotate mentions
*/
public Annotator entityMentions(Properties properties, String name) {
return new EntityMentionsAnnotator(name, properties);
}
/**
* Annotate for gender of tokens
*/
public Annotator gender(Properties properties, boolean verbose) {
return new GenderAnnotator(false, properties.getProperty("gender.firstnames", DefaultPaths.DEFAULT_GENDER_FIRST_NAMES));
}
/**
* Annotate parse trees
*
* @param properties Properties that control the behavior of the parser. It use "parse.x" properties.
* @return A ParserAnnotator
*/
public Annotator parse(Properties properties) {
String parserType = properties.getProperty("parse.type", "stanford");
String maxLenStr = properties.getProperty("parse.maxlen");
if (parserType.equalsIgnoreCase("stanford")) {
return new ParserAnnotator("parse", properties);
} else if (parserType.equalsIgnoreCase("charniak")) {
String model = properties.getProperty("parse.model");
String parserExecutable = properties.getProperty("parse.executable");
if (model == null || parserExecutable == null) {
throw new RuntimeException("Both parse.model and parse.executable properties must be specified if parse.type=charniak");
}
int maxLen = 399;
if (maxLenStr != null) {
maxLen = Integer.parseInt(maxLenStr);
}
return new CharniakParserAnnotator(model, parserExecutable, false, maxLen);
} else {
throw new RuntimeException("Unknown parser type: " + parserType + " (currently supported: stanford and charniak)");
}
}
public Annotator custom(Properties properties, String property) {
String customName = property;
String customClassName = properties.getProperty(StanfordCoreNLP.CUSTOM_ANNOTATOR_PREFIX + property);
if (property.startsWith(StanfordCoreNLP.CUSTOM_ANNOTATOR_PREFIX)) {
customName = property.substring(StanfordCoreNLP.CUSTOM_ANNOTATOR_PREFIX.length());
customClassName = properties.getProperty(property);
}
try {
// name + properties
return new MetaClass(customClassName).createInstance(customName, properties);
} catch (MetaClass.ConstructorNotFoundException e) {
try {
// name
return new MetaClass(customClassName).createInstance(customName);
} catch (MetaClass.ConstructorNotFoundException e2) {
// properties
try {
return new MetaClass(customClassName).createInstance(properties);
} catch (MetaClass.ConstructorNotFoundException e3) {
// empty arguments
return new MetaClass(customClassName).createInstance();
}
}
}
}
/**
* Infer the original casing of tokens
*/
public Annotator trueCase(Properties properties) {
return new TrueCaseAnnotator(properties);
}
/**
* Annotate for mention (statistical or hybrid)
*/
public Annotator mention(Properties properties) {
// TO DO: split up coref and mention properties
Properties corefProperties = PropertiesUtils.extractPrefixedProperties(properties,
Annotator.STANFORD_COREF + ".",
true);
Properties mentionProperties = PropertiesUtils.extractPrefixedProperties(properties,
Annotator.STANFORD_MENTION + ".",
true);
Properties allPropsForCoref = new Properties();
allPropsForCoref.putAll(corefProperties);
allPropsForCoref.putAll(mentionProperties);
return new MentionAnnotator(allPropsForCoref);
}
/**
* Annotate for coreference (statistical or hybrid)
*/
public Annotator coref(Properties properties) {
Properties corefProperties = PropertiesUtils.extractPrefixedProperties(properties,
Annotator.STANFORD_COREF + ".",
true);
Properties mentionProperties = PropertiesUtils.extractPrefixedProperties(properties,
Annotator.STANFORD_MENTION + ".",
true);
Properties allPropsForCoref = new Properties();
allPropsForCoref.putAll(corefProperties);
allPropsForCoref.putAll(mentionProperties);
return new CorefAnnotator(allPropsForCoref);
}
/**
* Annotate for coreference (deterministic)
*/
public Annotator dcoref(Properties properties) {
return new DeterministicCorefAnnotator(properties);
}
/**
* Annotate for relations expressed in sentences
*/
public Annotator relations(Properties properties) {
return new RelationExtractorAnnotator(properties);
}
/**
* Annotate for sentiment in sentences
*/
public Annotator sentiment(Properties properties, String name) {
return new SentimentAnnotator(name, properties);
}
/**
* Annotate with the column data classifier.
*/
public Annotator columnData(Properties properties) {
if (properties.containsKey("classify.loadClassifier")) {
properties.setProperty("loadClassifier", properties.getProperty("classify.loadClassifier"));
}
if (!properties.containsKey("loadClassifier")) {
throw new RuntimeException("Must load a classifier when creating a column data classifier annotator");
}
return new ColumnDataClassifierAnnotator(properties);
}
/**
* Annotate dependency relations in sentences
*/
public Annotator dependencies(Properties properties) {
Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties,
Annotator.STANFORD_DEPENDENCIES + '.');
return new DependencyParseAnnotator(relevantProperties);
}
/**
* Annotate operators (e.g., quantifiers) and polarity of tokens in a sentence
*/
public Annotator natlog(Properties properties) {
Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties,
Annotator.STANFORD_NATLOG + '.');
return new NaturalLogicAnnotator(relevantProperties);
}
/**
* Annotate {@link edu.stanford.nlp.ie.util.RelationTriple}s from text.
*/
public Annotator openie(Properties properties) {
Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties,
Annotator.STANFORD_OPENIE + '.');
return new OpenIE(relevantProperties);
}
/**
* Annotate quotes and extract them like sentences
*/
public Annotator quote(Properties properties) {
Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties,
Annotator.STANFORD_QUOTE + '.');
return new QuoteAnnotator(relevantProperties);
}
/**
* Attribute quotes to speakers
*/
public Annotator quoteattribution(Properties properties) {
Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties,
Annotator.STANFORD_QUOTE_ATTRIBUTION + '.');
return new QuoteAttributionAnnotator(relevantProperties);
}
/**
* Add universal dependencies features
*/
public Annotator udfeats(Properties properties) {
return new UDFeatureAnnotator();
}
/**
* Annotate for KBP relations
*/
public Annotator kbp(Properties properties) {
return new KBPAnnotator(Annotator.STANFORD_KBP, properties);
}
public Annotator link(Properties properties) {
return new WikidictAnnotator(Annotator.STANFORD_LINK, properties);
}
}