package edu.stanford.nlp.pipeline; import edu.stanford.nlp.io.RuntimeIOException; import edu.stanford.nlp.util.logging.Redwood; import edu.stanford.nlp.ie.NERClassifierCombiner; import edu.stanford.nlp.ie.regexp.NumberSequenceClassifier; import edu.stanford.nlp.naturalli.NaturalLogicAnnotator; import edu.stanford.nlp.naturalli.OpenIE; import edu.stanford.nlp.util.MetaClass; import edu.stanford.nlp.util.PropertiesUtils; import java.io.IOException; import java.util.*; /** * A class abstracting the implementation of various annotators. * Importantly, subclasses of this class can overwrite the implementation * of these annotators by returning a different annotator, and * {@link edu.stanford.nlp.pipeline.StanfordCoreNLP} will automatically load * the new annotator instead. * * @author Gabor Angeli */ public class AnnotatorImplementations { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(AnnotatorImplementations.class); /** * Tokenize, emulating the Penn Treebank */ public Annotator tokenizer(Properties properties) { return new TokenizerAnnotator(properties); } /** * Clean XML input */ public CleanXmlAnnotator cleanXML(Properties properties) { return new CleanXmlAnnotator(properties); } /** * Sentence split, in addition to a bunch of other things in this annotator (be careful to check the implementation!) */ public Annotator wordToSentences(Properties properties) { return new WordsToSentencesAnnotator(properties); } /** * Part of speech tag */ public Annotator posTagger(Properties properties) { String annotatorName = "pos"; return new POSTaggerAnnotator(annotatorName, properties); } /** * Annotate lemmas */ public Annotator morpha(Properties properties, boolean verbose) { return new MorphaAnnotator(verbose); } /** * Annotate for named entities -- note that this combines multiple NER tag sets, and some auxiliary things (like temporal tagging) */ public Annotator ner(Properties properties) { try { return new NERCombinerAnnotator(properties); } catch (IOException e) { throw new RuntimeIOException(e); } } /** * Run TokensRegex -- annotate patterns found in tokens */ public Annotator tokensregex(Properties properties, String name) { return new TokensRegexAnnotator(name, properties); } /** * Run RegexNER -- rule-based NER based on a deterministic mapping file */ public Annotator tokensRegexNER(Properties properties, String name) { return new TokensRegexNERAnnotator(name, properties); } /** * Annotate mentions */ public Annotator entityMentions(Properties properties, String name) { return new EntityMentionsAnnotator(name, properties); } /** * Annotate for gender of tokens */ public Annotator gender(Properties properties, boolean verbose) { return new GenderAnnotator(false, properties.getProperty("gender.firstnames", DefaultPaths.DEFAULT_GENDER_FIRST_NAMES)); } /** * Annotate parse trees * * @param properties Properties that control the behavior of the parser. It use "parse.x" properties. * @return A ParserAnnotator */ public Annotator parse(Properties properties) { String parserType = properties.getProperty("parse.type", "stanford"); String maxLenStr = properties.getProperty("parse.maxlen"); if (parserType.equalsIgnoreCase("stanford")) { return new ParserAnnotator("parse", properties); } else if (parserType.equalsIgnoreCase("charniak")) { String model = properties.getProperty("parse.model"); String parserExecutable = properties.getProperty("parse.executable"); if (model == null || parserExecutable == null) { throw new RuntimeException("Both parse.model and parse.executable properties must be specified if parse.type=charniak"); } int maxLen = 399; if (maxLenStr != null) { maxLen = Integer.parseInt(maxLenStr); } return new CharniakParserAnnotator(model, parserExecutable, false, maxLen); } else { throw new RuntimeException("Unknown parser type: " + parserType + " (currently supported: stanford and charniak)"); } } public Annotator custom(Properties properties, String property) { String customName = property; String customClassName = properties.getProperty(StanfordCoreNLP.CUSTOM_ANNOTATOR_PREFIX + property); if (property.startsWith(StanfordCoreNLP.CUSTOM_ANNOTATOR_PREFIX)) { customName = property.substring(StanfordCoreNLP.CUSTOM_ANNOTATOR_PREFIX.length()); customClassName = properties.getProperty(property); } try { // name + properties return new MetaClass(customClassName).createInstance(customName, properties); } catch (MetaClass.ConstructorNotFoundException e) { try { // name return new MetaClass(customClassName).createInstance(customName); } catch (MetaClass.ConstructorNotFoundException e2) { // properties try { return new MetaClass(customClassName).createInstance(properties); } catch (MetaClass.ConstructorNotFoundException e3) { // empty arguments return new MetaClass(customClassName).createInstance(); } } } } /** * Infer the original casing of tokens */ public Annotator trueCase(Properties properties) { return new TrueCaseAnnotator(properties); } /** * Annotate for mention (statistical or hybrid) */ public Annotator mention(Properties properties) { // TO DO: split up coref and mention properties Properties corefProperties = PropertiesUtils.extractPrefixedProperties(properties, Annotator.STANFORD_COREF + ".", true); Properties mentionProperties = PropertiesUtils.extractPrefixedProperties(properties, Annotator.STANFORD_MENTION + ".", true); Properties allPropsForCoref = new Properties(); allPropsForCoref.putAll(corefProperties); allPropsForCoref.putAll(mentionProperties); return new MentionAnnotator(allPropsForCoref); } /** * Annotate for coreference (statistical or hybrid) */ public Annotator coref(Properties properties) { Properties corefProperties = PropertiesUtils.extractPrefixedProperties(properties, Annotator.STANFORD_COREF + ".", true); Properties mentionProperties = PropertiesUtils.extractPrefixedProperties(properties, Annotator.STANFORD_MENTION + ".", true); Properties allPropsForCoref = new Properties(); allPropsForCoref.putAll(corefProperties); allPropsForCoref.putAll(mentionProperties); return new CorefAnnotator(allPropsForCoref); } /** * Annotate for coreference (deterministic) */ public Annotator dcoref(Properties properties) { return new DeterministicCorefAnnotator(properties); } /** * Annotate for relations expressed in sentences */ public Annotator relations(Properties properties) { return new RelationExtractorAnnotator(properties); } /** * Annotate for sentiment in sentences */ public Annotator sentiment(Properties properties, String name) { return new SentimentAnnotator(name, properties); } /** * Annotate with the column data classifier. */ public Annotator columnData(Properties properties) { if (properties.containsKey("classify.loadClassifier")) { properties.setProperty("loadClassifier", properties.getProperty("classify.loadClassifier")); } if (!properties.containsKey("loadClassifier")) { throw new RuntimeException("Must load a classifier when creating a column data classifier annotator"); } return new ColumnDataClassifierAnnotator(properties); } /** * Annotate dependency relations in sentences */ public Annotator dependencies(Properties properties) { Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties, Annotator.STANFORD_DEPENDENCIES + '.'); return new DependencyParseAnnotator(relevantProperties); } /** * Annotate operators (e.g., quantifiers) and polarity of tokens in a sentence */ public Annotator natlog(Properties properties) { Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties, Annotator.STANFORD_NATLOG + '.'); return new NaturalLogicAnnotator(relevantProperties); } /** * Annotate {@link edu.stanford.nlp.ie.util.RelationTriple}s from text. */ public Annotator openie(Properties properties) { Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties, Annotator.STANFORD_OPENIE + '.'); return new OpenIE(relevantProperties); } /** * Annotate quotes and extract them like sentences */ public Annotator quote(Properties properties) { Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties, Annotator.STANFORD_QUOTE + '.'); return new QuoteAnnotator(relevantProperties); } /** * Attribute quotes to speakers */ public Annotator quoteattribution(Properties properties) { Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties, Annotator.STANFORD_QUOTE_ATTRIBUTION + '.'); return new QuoteAttributionAnnotator(relevantProperties); } /** * Add universal dependencies features */ public Annotator udfeats(Properties properties) { return new UDFeatureAnnotator(); } /** * Annotate for KBP relations */ public Annotator kbp(Properties properties) { return new KBPAnnotator(Annotator.STANFORD_KBP, properties); } public Annotator link(Properties properties) { return new WikidictAnnotator(Annotator.STANFORD_LINK, properties); } }