/** * Copyright 2007-2014 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package de.tudarmstadt.ukp.dkpro.core.stanfordnlp; import static org.apache.commons.io.IOUtils.closeQuietly; import static org.apache.uima.util.Level.FINE; import static org.apache.uima.util.Level.INFO; import static org.apache.uima.util.Level.WARNING; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; import java.net.URL; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Properties; import java.util.zip.GZIPInputStream; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.reflect.FieldUtils; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.FSIterator; import org.apache.uima.cas.Type; import org.apache.uima.fit.component.JCasAnnotator_ImplBase; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.descriptor.TypeCapability; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.util.Level; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; import de.tudarmstadt.ukp.dkpro.core.api.metadata.SingletonTagset; import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters; import de.tudarmstadt.ukp.dkpro.core.api.resources.CasConfigurableProviderBase; import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider; import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProviderFactory; import de.tudarmstadt.ukp.dkpro.core.api.resources.ModelProviderBase; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.Constituent; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.DependencyFlavor; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.ROOT; import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.CoreNlpUtils; import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator; import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeWithTokens; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.parser.common.ParserGrammar; import edu.stanford.nlp.parser.common.ParserQuery; import edu.stanford.nlp.parser.lexparser.LexicalizedParser; import edu.stanford.nlp.parser.lexparser.TestOptions; import edu.stanford.nlp.parser.shiftreduce.BaseModel; import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; import edu.stanford.nlp.process.PTBEscapingProcessor; import edu.stanford.nlp.trees.AbstractTreebankLanguagePack; import edu.stanford.nlp.trees.EnglishGrammaticalRelations; import edu.stanford.nlp.trees.EnglishGrammaticalStructureFactory; import edu.stanford.nlp.trees.GrammaticalRelation; import edu.stanford.nlp.trees.GrammaticalStructure; import edu.stanford.nlp.trees.GrammaticalStructureFactory; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreebankLanguagePack; import edu.stanford.nlp.trees.TypedDependency; import edu.stanford.nlp.trees.UniversalEnglishGrammaticalRelations; import edu.stanford.nlp.trees.UniversalEnglishGrammaticalStructureFactory; import edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalRelations; import edu.stanford.nlp.util.Filters; /** * Stanford Parser component. * */ @TypeCapability( inputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS" }, outputs = { "de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.Constituent", "de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency"} ) public class StanfordParser extends JCasAnnotator_ImplBase { public static enum DependenciesMode { /** * Produce basic dependencies. <br> * Corresponding parser option: {@code basic} */ BASIC, // basic - typedDependencies(false) /** * Produce basic dependencies plus extra arcs for control relationships, etc. <br> * Corresponding parser option: {@code nonCollapsed} */ NON_COLLAPSED, // nonCollapsed - typedDependencies(true) /** * Produce collapsed dependencies. This removes dependencies on specific function words * (e.g. prepositions and conjunctions). The result not be a tree, e.g. it can include * cycles and re-entrancies. <br> * Corresponding parser option: {@code collapsed} */ COLLAPSED, // collapsed - typedDependenciesCollapsed(false) /** * Produce collapsed dependencies plus extra arcs for control relationships, etc. <br> * Corresponding parser option: {@code not available} */ COLLAPSED_WITH_EXTRA, // - none - - typedDependenciesCollapsed(true) /** * Produce collapsed dependencies plus extra arcs for control relationships, etc. * In this mode, dependencies are collapsed across coordination. This mode is supposed to * produce the best syntactic and semantic representation of a sentence. The result * may not be a tree (may contain cycles), but is a directed graph.<br> * Corresponding parser option: {@code CCPropagated} */ CC_PROPAGATED, // CCPropagated - typedDependenciesCCprocessed(true) /** * Produce dependencies collapsed across coordination. No extra dependencies for control * relations are included.<br> * Corresponding parser option: {@code not available} */ CC_PROPAGATED_NO_EXTRA, // - none - - typedDependenciesCCprocessed(false) /** * Produce mostly collapsed dependencies that remain a tree structure. Several steps are * omitted: * <ol> * <li>no processing of relative clauses</li> * <li>no xsubj relations</li> * <li>no propagation of conjuncts</li> * </ol> * Corresponding parser option: {@code tree} */ TREE, // tree - typedDependencies(false) + collapseDependenciesTree(tdl) ENHANCED, // ENHANCED_PLUS_PLUS // } /** * Write the tag set(s) to the log when a model is loaded. */ public static final String PARAM_PRINT_TAGSET = ComponentParameters.PARAM_PRINT_TAGSET; @ConfigurationParameter(name = PARAM_PRINT_TAGSET, mandatory = true, defaultValue = "false") protected boolean printTagSet; /** * Use this language instead of the document language to resolve the model and tag set mapping. */ public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE; @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false) protected String language; /** * Variant of a model the model. Used to address a specific model if here are multiple models * for one language. */ public static final String PARAM_VARIANT = ComponentParameters.PARAM_VARIANT; @ConfigurationParameter(name = PARAM_VARIANT, mandatory = false) protected String variant; /** * Location from which the model is read. */ public static final String PARAM_MODEL_LOCATION = ComponentParameters.PARAM_MODEL_LOCATION; @ConfigurationParameter(name = PARAM_MODEL_LOCATION, mandatory = false) protected String modelLocation; /** * Location of the mapping file for part-of-speech tags to UIMA types. */ public static final String PARAM_POS_MAPPING_LOCATION = ComponentParameters.PARAM_POS_MAPPING_LOCATION; @ConfigurationParameter(name = PARAM_POS_MAPPING_LOCATION, mandatory = false) protected String posMappingLocation; /** * Location of the mapping file for constituent tags to UIMA types. */ public static final String PARAM_CONSTITUENT_MAPPING_LOCATION = ComponentParameters.PARAM_CONSTITUENT_MAPPING_LOCATION; @ConfigurationParameter(name = PARAM_CONSTITUENT_MAPPING_LOCATION, mandatory = false) protected String constituentMappingLocation; /** * Sets whether to create or not to create dependency annotations. * * <p>Default: {@code true} */ public static final String PARAM_WRITE_DEPENDENCY = ComponentParameters.PARAM_WRITE_DEPENDENCY; @ConfigurationParameter(name = PARAM_WRITE_DEPENDENCY, mandatory = true, defaultValue = "true") private boolean writeDependency; /** * Sets the kind of dependencies being created. * * <p>Default: {@link DependenciesMode#TREE TREE} * @see DependenciesMode */ public static final String PARAM_MODE = "mode"; @ConfigurationParameter(name = PARAM_MODE, mandatory = false, defaultValue = "TREE") protected DependenciesMode mode; /** * Sets whether to create or not to create constituent tags. This is required for POS-tagging * and lemmatization. * <p> * Default: {@code true} */ public static final String PARAM_WRITE_CONSTITUENT = ComponentParameters.PARAM_WRITE_CONSTITUENT; @ConfigurationParameter(name = PARAM_WRITE_CONSTITUENT, mandatory = true, defaultValue = "true") private boolean writeConstituent; /** * If this parameter is set to true, each sentence is annotated with a PennTree-Annotation, * containing the whole parse tree in Penn Treebank style format. * <p> * Default: {@code false} */ public static final String PARAM_WRITE_PENN_TREE = ComponentParameters.PARAM_WRITE_PENN_TREE; @ConfigurationParameter(name = PARAM_WRITE_PENN_TREE, mandatory = true, defaultValue = "false") private boolean writePennTree; /** * This parameter can be used to override the standard behavior which uses the <i>Sentence</i> * annotation as the basic unit for parsing. * <p>If the parameter is set with the name of an annotation type <i>x</i>, the parser will no * longer parse <i>Sentence</i>-annotations, but <i>x</i>-Annotations.</p> * <p>Default: {@code null} */ public static final String PARAM_ANNOTATIONTYPE_TO_PARSE = "annotationTypeToParse"; @ConfigurationParameter(name = PARAM_ANNOTATIONTYPE_TO_PARSE, mandatory = false) private String annotationTypeToParse; /** * Sets whether to create or not to create POS tags. The creation of constituent tags must be * turned on for this to work. * <p> * Default: {@code false} */ public static final String PARAM_WRITE_POS = ComponentParameters.PARAM_WRITE_POS; @ConfigurationParameter(name = PARAM_WRITE_POS, mandatory = true, defaultValue = "false") private boolean writePos; /** * Sets whether to use or not to use already existing POS tags from another annotator for the * parsing process. * <p> * Default: {@code true} */ public static final String PARAM_READ_POS = ComponentParameters.PARAM_READ_POS; @ConfigurationParameter(name = PARAM_READ_POS, mandatory = true, defaultValue = "true") private boolean readPos; /** * Maximum number of tokens in a sentence. Longer sentences are not parsed. This is to avoid out * of memory exceptions. * <p> * Default: {@code 130} * * @see TestOptions#maxLength */ public static final String PARAM_MAX_SENTENCE_LENGTH = ComponentParameters.PARAM_MAX_SENTENCE_LENGTH; @ConfigurationParameter(name = PARAM_MAX_SENTENCE_LENGTH, mandatory = true, defaultValue = "130") private int maxTokens; /** * Controls when the factored parser considers a sentence to be too complex and falls back to * the PCFG parser. * <p> * Default: {@code 200000} * * @see TestOptions#MAX_ITEMS */ public static final String PARAM_MAX_ITEMS = "maxItems"; @ConfigurationParameter(name = PARAM_MAX_ITEMS, mandatory = true, defaultValue = "200000") private int maxItems; /** * Enable all traditional PTB3 token transforms (like -LRB-, -RRB-). * * @see PTBEscapingProcessor */ public static final String PARAM_PTB3_ESCAPING = "ptb3Escaping"; @ConfigurationParameter(name = PARAM_PTB3_ESCAPING, mandatory = true, defaultValue = "true") private boolean ptb3Escaping; /** * List of extra token texts (usually single character strings) that should be treated like * opening quotes and escaped accordingly before being sent to the parser. */ public static final String PARAM_QUOTE_BEGIN = "quoteBegin"; @ConfigurationParameter(name = PARAM_QUOTE_BEGIN, mandatory = false) private List<String> quoteBegin; /** * List of extra token texts (usually single character strings) that should be treated like * closing quotes and escaped accordingly before being sent to the parser. */ public static final String PARAM_QUOTE_END = "quoteEnd"; @ConfigurationParameter(name = PARAM_QUOTE_END, mandatory = false) private List<String> quoteEnd; public static final String PARAM_KEEP_PUNCTUATION = "keepPunctuation"; @ConfigurationParameter(name = PARAM_KEEP_PUNCTUATION, mandatory = true, defaultValue = "false") private boolean keepPunctuation; private CasConfigurableProviderBase<ParserGrammar> modelProvider; private MappingProvider posMappingProvider; private MappingProvider constituentMappingProvider; @Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); if (!writeConstituent && !writeDependency && !writePennTree) { getLogger().warn("Invalid parameter configuration... will create dependency tags."); writeDependency = true; } // Check if we want to create Lemmas or POS tags while Consituent tags // are disabled. In this case, we have to switch on constituent tagging if (!writeConstituent && writePos) { getLogger().warn("Constituent tag creation is required for POS tagging. Will create " + "constituent tags."); writeConstituent = true; } modelProvider = new StanfordParserModelProvider(); posMappingProvider = MappingProviderFactory.createPosMappingProvider(posMappingLocation, language, modelProvider); constituentMappingProvider = MappingProviderFactory.createConstituentMappingProvider( constituentMappingLocation, language, modelProvider); } /** * Processes the given text using the StanfordParser. * * @param aJCas * the {@link JCas} to process * @see org.apache.uima.analysis_component.JCasAnnotator_ImplBase#process(org.apache.uima.jcas.JCas) */ @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { modelProvider.configure(aJCas.getCas()); posMappingProvider.configure(aJCas.getCas()); constituentMappingProvider.configure(aJCas.getCas()); Type typeToParse; if (annotationTypeToParse != null) { typeToParse = aJCas.getCas().getTypeSystem().getType(annotationTypeToParse); } else { typeToParse = JCasUtil.getType(aJCas, Sentence.class); } FSIterator<Annotation> typeToParseIterator = aJCas.getAnnotationIndex(typeToParse) .iterator(); // Iterator each Sentence or whichever construct to parse while (typeToParseIterator.hasNext()) { Annotation currAnnotationToParse = typeToParseIterator.next(); if (StringUtils.isBlank(currAnnotationToParse.getCoveredText())) { continue; } List<HasWord> tokenizedSentence = new ArrayList<HasWord>(); List<Token> tokens = new ArrayList<Token>(); // Split sentence to tokens for annotating indexes for (Token token : JCasUtil.selectCovered(Token.class, currAnnotationToParse)) { tokenizedSentence.add(tokenToWord(token)); tokens.add(token); } getContext().getLogger().log(FINE, tokenizedSentence.toString()); ParserGrammar parser = modelProvider.getResource(); Tree parseTree; try { if (tokenizedSentence.size() > maxTokens) { continue; } if (ptb3Escaping) { tokenizedSentence = CoreNlpUtils.applyPtbEscaping(tokenizedSentence, quoteBegin, quoteEnd); } // Get parse ParserQuery query = parser.parserQuery(); query.parse(tokenizedSentence); parseTree = query.getBestParse(); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } // Create new StanfordAnnotator object StanfordAnnotator sfAnnotator = null; try { sfAnnotator = new StanfordAnnotator(new TreeWithTokens(parseTree, tokens)); sfAnnotator.setPosMappingProvider(posMappingProvider); sfAnnotator.setConstituentMappingProvider(constituentMappingProvider); } catch (Exception e) { getLogger().error( "Unable to parse [" + currAnnotationToParse.getCoveredText() + "]"); throw new AnalysisEngineProcessException(e); } // Create Penn bracketed structure annotations if (writePennTree) { sfAnnotator.createPennTreeAnnotation(currAnnotationToParse.getBegin(), currAnnotationToParse.getEnd()); } // Create dependency annotations if (writeDependency) { doCreateDependencyTags(parser, sfAnnotator, parseTree, tokens); } // Create constituent annotations if (writeConstituent) { sfAnnotator.createConstituentAnnotationFromTree(parser.getTLPParams() .treebankLanguagePack(), writePos); } } } protected void doCreateDependencyTags(ParserGrammar aParser, StanfordAnnotator sfAnnotator, Tree parseTree, List<Token> tokens) { GrammaticalStructure gs; try { TreebankLanguagePack tlp = aParser.getTLPParams().treebankLanguagePack(); gs = tlp.grammaticalStructureFactory( keepPunctuation ? Filters.acceptFilter() : tlp.punctuationWordRejectFilter(), tlp.typedDependencyHeadFinder()).newGrammaticalStructure(parseTree); } catch (UnsupportedOperationException e) { // We already warned in the model provider if dependencies are not supported, so here // we just do nothing and skip the dependencies. return; } Collection<TypedDependency> dependencies = null; switch (mode) { case BASIC: dependencies = gs.typedDependencies(); // gs.typedDependencies(false); break; case NON_COLLAPSED: dependencies = gs.allTypedDependencies(); // gs.typedDependencies(true); break; case COLLAPSED_WITH_EXTRA: dependencies = gs.typedDependenciesCollapsed(true); break; case COLLAPSED: dependencies = gs.typedDependenciesCollapsed(false); break; case CC_PROPAGATED: dependencies = gs.typedDependenciesCCprocessed(true); break; case CC_PROPAGATED_NO_EXTRA: dependencies = gs.typedDependenciesCCprocessed(false); break; case ENHANCED: dependencies = gs.typedDependenciesEnhanced(); break; case ENHANCED_PLUS_PLUS: dependencies = gs.typedDependenciesEnhancedPlusPlus(); break; case TREE: dependencies = gs.typedDependenciesCollapsedTree(); break; default: throw new IllegalArgumentException("Unknown mode: [" + mode + "]"); } for (TypedDependency currTypedDep : dependencies) { int govIndex = currTypedDep.gov().index(); int depIndex = currTypedDep.dep().index(); Dependency dep; if (govIndex != 0) { Token govToken = tokens.get(govIndex - 1); Token depToken = tokens.get(depIndex - 1); dep = sfAnnotator.createDependencyAnnotation(currTypedDep.reln(), govToken, depToken); } else { Token depToken = tokens.get(depIndex - 1); dep = new ROOT(sfAnnotator.getJCas()); dep.setDependencyType(currTypedDep.reln().toString()); dep.setGovernor(depToken); dep.setDependent(depToken); dep.setBegin(dep.getDependent().getBegin()); dep.setEnd(dep.getDependent().getEnd()); dep.addToIndexes(); } dep.setFlavor(currTypedDep.extra() ? DependencyFlavor.ENHANCED : DependencyFlavor.BASIC); } } protected CoreLabel tokenToWord(Token aToken) { CoreLabel l = CoreNlpUtils.tokenToWord(aToken); l.setValue(aToken.getCoveredText()); if (!readPos) { l.setTag(null); } return l; } private class StanfordParserModelProvider extends ModelProviderBase<ParserGrammar> { { setContextObject(StanfordParser.this); setDefault(ARTIFACT_ID, "${groupId}.stanfordnlp-model-parser-${language}-${variant}"); setDefault(LOCATION, "classpath:/${package}/lib/parser-${language}-${variant}.properties"); setDefaultVariantsLocation("${package}/lib/parser-default-variants.map"); setOverride(LOCATION, modelLocation); setOverride(LANGUAGE, language); setOverride(VARIANT, variant); } @Override protected ParserGrammar produceResource(URL aUrl) throws IOException { getContext().getLogger().log(Level.INFO, "Loading parser from serialized file " + aUrl + " ..."); ObjectInputStream in = null; InputStream is = null; try { is = aUrl.openStream(); if (aUrl.toString().endsWith(".gz")) { // it's faster to do the buffering _outside_ the gzipping as here in = new ObjectInputStream(new BufferedInputStream(new GZIPInputStream(is))); } else { in = new ObjectInputStream(new BufferedInputStream(is)); } ParserGrammar pd = (ParserGrammar) in.readObject(); AbstractTreebankLanguagePack lp = (AbstractTreebankLanguagePack) pd.getTLPParams() .treebankLanguagePack(); // For the moment we hard-code to generate the old non-universal dependencies. // Setting this through a parameter would be a problem if the model would be shared // between multiple AEs that use different settings for this parameter. pd.getTLPParams().setGenerateOriginalDependencies(true); Properties metadata = getResourceMetaData(); // https://mailman.stanford.edu/pipermail/parser-user/2012-November/002117.html // The tagIndex does give all and only the set of POS tags used in the // current grammar. However, these are the split tags actually used by the // grammar. If you really want the user-visible non-split tags of the // original treebank, then you'd need to map them all through the // op.treebankLanguagePack().basicCategory(). -- C. Manning SingletonTagset posTags = new SingletonTagset(POS.class, metadata.getProperty("pos.tagset")); if (pd instanceof LexicalizedParser) { LexicalizedParser lexParser = (LexicalizedParser) pd; for (String tag : lexParser.tagIndex) { String t = lp.basicCategory(tag); // Strip grammatical function from tag int gfIdx = t.indexOf(lp.getGfCharacter()); if (gfIdx > 0) { // TODO should collect syntactic functions in separate tagset // syntacticFunction = nodeLabelValue.substring(gfIdx + 1); t = t.substring(0, gfIdx); } posTags.add(lp.basicCategory(t)); } addTagset(posTags, writePos); } // https://mailman.stanford.edu/pipermail/parser-user/2012-November/002117.html // For constituent categories, there isn't an index of just them. The // stateIndex has both constituent categories and POS tags in it, so you'd // need to set difference out the tags from the tagIndex, and then it's as // above. -- C. Manning SingletonTagset constTags = new SingletonTagset( Constituent.class, metadata.getProperty("constituent.tagset")); Iterable<String> states; if (pd instanceof LexicalizedParser) { states = ((LexicalizedParser) pd).stateIndex; } else if (pd instanceof ShiftReduceParser) { BaseModel model = (BaseModel) FieldUtils.readField(pd, "model", true); states = (Iterable<String>) FieldUtils.readField(model, "knownStates", true); // states = ((ShiftReduceParser) pd).tagSet(); } else { throw new IllegalStateException("Unknown parser type [" + pd.getClass().getName() + "]"); } for (String tag : states) { String t = lp.basicCategory(tag); // https://mailman.stanford.edu/pipermail/parser-user/2012-December/002156.html // The parser algorithm used is a binary parser, so what we do is // binarize trees by turning A -> B, C, D into A -> B, @A, @A -> C, D. // (That's roughly how it goes, although the exact details are somewhat // different.) When parsing, we parse to a binarized tree and then // unbinarize it before returning. That's the origin of the @ classes. // -- J. Bauer if (!t.startsWith("@")) { // Strip grammatical function from tag int gfIdx = t.indexOf(lp.getGfCharacter()); if (gfIdx > 0) { // TODO should collect syntactic functions in separate tagset // syntacticFunction = nodeLabelValue.substring(gfIdx + 1); t = t.substring(0, gfIdx); } if (t.length() > 0) { constTags.add(t); } } } constTags.removeAll(posTags); if (writeConstituent) { addTagset(constTags); } // There is no way to determine the relations via the GrammaticalStructureFactory // API, so we do it manually here for the languages known to support this. GrammaticalStructureFactory gsf = null; try { gsf = lp.grammaticalStructureFactory(lp.punctuationWordRejectFilter(), lp.typedDependencyHeadFinder()); } catch (UnsupportedOperationException e) { getContext().getLogger().log(WARNING, "Current model does not seem to support " + "dependencies."); } if (gsf != null && EnglishGrammaticalStructureFactory.class.equals(gsf.getClass())) { SingletonTagset depTags = new SingletonTagset(Dependency.class, "stanford341"); for (GrammaticalRelation r : EnglishGrammaticalRelations.values()) { depTags.add(r.getShortName()); } if (writeDependency) { addTagset(depTags); } } else if (gsf != null && UniversalEnglishGrammaticalStructureFactory.class.equals(gsf.getClass())) { SingletonTagset depTags = new SingletonTagset(Dependency.class, "universal"); for (GrammaticalRelation r : UniversalEnglishGrammaticalRelations.values()) { depTags.add(r.getShortName()); } if (writeDependency) { addTagset(depTags); } } else if (gsf != null && ChineseGrammaticalRelations.class.equals(gsf.getClass())) { SingletonTagset depTags = new SingletonTagset(Dependency.class, "stanford"); for (GrammaticalRelation r : ChineseGrammaticalRelations.values()) { depTags.add(r.getShortName()); } if (writeDependency) { addTagset(depTags); } } if (printTagSet) { getContext().getLogger().log(INFO, getTagset().toString()); } pd.setOptionFlags("-maxLength", String.valueOf(maxTokens), "-MAX_ITEMS", String.valueOf(maxItems)); return pd; } catch (IllegalAccessException e) { throw new IllegalStateException(e); } catch (ClassNotFoundException e) { throw new IOException(e); } finally { closeQuietly(in); closeQuietly(is); } } }; }