package edu.stanford.nlp.trees.ud;
import edu.stanford.nlp.ie.NERClassifierCombiner;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.*;
import edu.stanford.nlp.process.Morphology;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.util.StringUtils;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.stream.Collectors;
/**
*
* Command-line utility to:
*
* a) convert constituency trees to basic English UD trees
* b) convert basic dependency trees to enhanced and enhanced++ UD graphs
*
*
* @author Sebastian Schuster
*/
public class UniversalDependenciesConverter {
private static final boolean USE_NAME = System.getProperty("UDUseNameRelation") != null;
private UniversalDependenciesConverter() {} // static main
private static GrammaticalStructure semanticGraphToGrammaticalStructure(SemanticGraph sg) {
/* sg.typedDependency() generates an ArrayList */
List<TypedDependency> deps = (List<TypedDependency>) sg.typedDependencies();
IndexedWord root = deps.get(0).gov();
TreeGraphNode rootNode = new TreeGraphNode(root);
GrammaticalStructure gs = new UniversalEnglishGrammaticalStructure(deps, rootNode);
return gs;
}
/**
* Converts basic UD tree to enhanced UD graph.
*
*/
private static SemanticGraph convertBasicToEnhanced(SemanticGraph sg) {
GrammaticalStructure gs = semanticGraphToGrammaticalStructure(sg);
return SemanticGraphFactory.generateEnhancedDependencies(gs);
}
/**
* Converts basic UD tree to enhanced++ UD graph.
*
*/
private static SemanticGraph convertBasicToEnhancedPlusPlus(SemanticGraph sg) {
GrammaticalStructure gs = semanticGraphToGrammaticalStructure(sg);
return SemanticGraphFactory.generateEnhancedPlusPlusDependencies(gs);
}
private static SemanticGraph convertTreeToBasic(Tree tree) {
addLemmata(tree);
addNERTags(tree);
SemanticGraph sg = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.BASIC,
GrammaticalStructure.Extras.NONE, null, false, true);
addLemmata(sg);
if (USE_NAME) {
addNERTags(sg);
}
return sg;
}
private static class TreeToSemanticGraphIterator implements Iterator<SemanticGraph> {
private Iterator<Tree> treeIterator;
private Tree currentTree; // = null;
public TreeToSemanticGraphIterator(Iterator<Tree> treeIterator) {
this.treeIterator = treeIterator;
}
@Override
public boolean hasNext() {
return treeIterator.hasNext();
}
@Override
public SemanticGraph next() {
Tree t = treeIterator.next();
currentTree = t;
return convertTreeToBasic(t);
}
public Tree getCurrentTree() {
return this.currentTree;
}
}
private static Morphology MORPH = new Morphology();
private static void addLemmata(SemanticGraph sg) {
sg.vertexListSorted().forEach(w -> {
if(w.lemma() == null) {
w.setLemma(MORPH.lemma(w.word(), w.tag()));
}
});
}
private static void addLemmata(Tree tree) {
tree.yield().forEach(l-> {
CoreLabel w = (CoreLabel) l;
if(w.lemma() == null) {
w.setLemma(MORPH.lemma(w.word(), w.tag()));
}
});
}
private static NERClassifierCombiner NER_TAGGER = null;
private static void addNERTags(SemanticGraph sg) {
if (NER_TAGGER == null) {
NER_TAGGER = NERClassifierCombiner.createNERClassifierCombiner(null, new Properties());
}
List<CoreLabel> labels = sg.vertexListSorted().stream().map(IndexedWord::backingLabel).collect(Collectors.toList());
NER_TAGGER.classify(labels);
}
private static void addNERTags(Tree tree) {
if (NER_TAGGER == null) {
NER_TAGGER = NERClassifierCombiner.createNERClassifierCombiner(null, new Properties());
}
List<CoreLabel> labels = tree.yield().stream().map(w -> (CoreLabel) w).collect(Collectors.toList());
NER_TAGGER.classify(labels);
}
/**
*
* Converts a constituency tree to the English basic, enhanced, or
* enhanced++ Universal dependencies representation, or an English basic
* Universal dependencies tree to the enhanced or enhanced++ representation.
*
* Command-line options:<br>
* {@code -treeFile}: File with PTB-formatted constituency trees<br>
* {@code -conlluFile}: File with basic dependency trees in CoNLL-U format<br>
* {@code -outputRepresentation}: "basic" (default), "enhanced", or "enhanced++"
*
*/
public static void main(String[] args) {
Properties props = StringUtils.argsToProperties(args);
String treeFileName = props.getProperty("treeFile");
String conlluFileName = props.getProperty("conlluFile");
String outputRepresentation = props.getProperty("outputRepresentation", "basic");
Iterator<SemanticGraph> sgIterator; // = null;
if (treeFileName != null) {
MemoryTreebank tb = new MemoryTreebank(new NPTmpRetainingTreeNormalizer(0, false, 1, false));
tb.loadPath(treeFileName);
Iterator<Tree> treeIterator = tb.iterator();
sgIterator = new TreeToSemanticGraphIterator(treeIterator);
} else if (conlluFileName != null) {
CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
try {
sgIterator = reader.getIterator(IOUtils.readerFromString(conlluFileName));
} catch (Exception e) {
throw new RuntimeException(e);
}
} else {
System.err.println("No input file specified!");
System.err.println("");
System.err.printf("Usage: java %s [-treeFile trees.tree | -conlluFile deptrees.conllu]"
+ " [-outputRepresentation basic|enhanced|enhanced++ (default: basic)]%n",
UniversalDependenciesConverter.class.getCanonicalName());
return;
}
CoNLLUDocumentWriter writer = new CoNLLUDocumentWriter();
while (sgIterator.hasNext()) {
SemanticGraph sg = sgIterator.next();
if (treeFileName != null) {
//add UPOS tags
Tree tree = ((TreeToSemanticGraphIterator) sgIterator).getCurrentTree();
Tree uposTree = UniversalPOSMapper.mapTree(tree);
List<Label> uposLabels = uposTree.preTerminalYield();
for (IndexedWord token: sg.vertexListSorted()) {
int idx = token.index() - 1;
String uposTag = uposLabels.get(idx).value();
token.set(CoreAnnotations.CoarseTagAnnotation.class, uposTag);
}
} else {
addLemmata(sg);
if (USE_NAME) {
addNERTags(sg);
}
}
if (outputRepresentation.equalsIgnoreCase("enhanced")) {
sg = convertBasicToEnhanced(sg);
} else if (outputRepresentation.equalsIgnoreCase("enhanced++")) {
sg = convertBasicToEnhancedPlusPlus(sg);
}
System.out.print(writer.printSemanticGraph(sg));
}
}
}