package edu.stanford.nlp.trees.international.spanish;
import edu.stanford.nlp.ling.CategoryWordTag;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.util.ArrayUtils;
import edu.stanford.nlp.util.Generics;
/**
* @author Jon Gauthier
*/
public class SpanishHeadFinder extends AbstractCollinsHeadFinder {
private static final long serialVersionUID = -841219428125220698L;
private static final String[] allVerbs = new String[] {
"vmip000", "vmii000", "vmif000", "vmis000", "vmic000",
"vmsp000", "vmsi000",
"vmm0000", "vmn0000", "vmg0000", "vmp0000",
"vaip000", "vaii000", "vaif000", "vais000", "vaic000",
"vasp000", "vasi000",
"vam0000", "van0000", "vag0000", "vap0000",
"vsip000", "vsii000", "vsis000", "vsif000", "vsic000",
"vssp000", "vssi000",
"vsm0000", "vsn0000", "vsg0000", "vsp0000"
};
public SpanishHeadFinder() {
this(new SpanishTreebankLanguagePack());
}
public SpanishHeadFinder(TreebankLanguagePack tlp) {
super(tlp);
nonTerminalInfo = Generics.newHashMap();
// "sentence"
String[][] rootRules = new String[][] {
{"right", "grup.verb", "s.a", "sn"},
{"left", "S"},
{"right", "sadv", "grup.adv", "neg", "interjeccio", "i", "sp", "grup.prep"},
insertVerbs(new String[] {"rightdis"},
new String[] {"nc0s000", "nc0p000", "nc00000", "np00000", "rg", "rn"})};
nonTerminalInfo.put(tlp.startSymbol(), rootRules);
nonTerminalInfo.put("S", rootRules);
nonTerminalInfo.put("sentence", rootRules);
nonTerminalInfo.put("inc", rootRules);
// adjectival phrases
String[][] adjectivePhraseRules = new String[][] {
{"leftdis", "grup.a", "s.a", "spec"}};
nonTerminalInfo.put("s.a", adjectivePhraseRules);
nonTerminalInfo.put("sa", adjectivePhraseRules);
nonTerminalInfo.put("grup.a", new String[][] {
{"rightdis", "aq0000", "ao0000"},
insertVerbs(new String[] {"right"}, new String[] {}),
{"right", "rg", "rn"}});
// adverbial phrases
nonTerminalInfo.put("sadv", new String[][] {{"left", "grup.adv", "sadv"}});
nonTerminalInfo.put("grup.adv", new String[][] {
{"left", "conj"},
{"rightdis", "rg", "rn", "neg", "grup.adv"},
{"rightdis", "pr000000", "pi000000", "nc0s000", "nc0p000", "nc00000", "np00000"}});
nonTerminalInfo.put("neg", new String[][] {{"leftdis", "rg", "rn"}});
// noun phrases
nonTerminalInfo.put("sn", new String[][] {
{"leftdis", "nc0s000", "nc0p000", "nc00000"},
{"left", "grup.nom", "grup.w", "grup.z", "sn"},
{"leftdis", "spec"}});
nonTerminalInfo.put("grup.nom", new String[][] {
{"leftdis", "nc0s000", "nc0p000", "nc00000", "np00000", "w", "grup.w"},
{"leftdis", "pi000000", "pd000000"},
{"left", "grup.nom", "sp"},
{"leftdis", "pn000000", "aq0000", "ao0000"},
{"left", "grup.a", "i", "grup.verb"},
{"leftdis", "grup.adv"}});
// verb phrases
nonTerminalInfo.put("grup.verb", new String[][] {insertVerbs(new String[] {"left"}, new String[] {})});
nonTerminalInfo.put("infinitiu", new String[][] {insertVerbs(new String[] {"left"}, new String[] {"infinitiu"})});
nonTerminalInfo.put("gerundi", new String[][] {{"left", "vmg0000", "vag0000", "vsg0000", "gerundi"}});
nonTerminalInfo.put("participi", new String[][] {{"left", "aq", "vmp0000", "vap0000", "vsp0000", "grup.a"}});
// specifiers
nonTerminalInfo.put("spec", new String[][] {
{"left", "conj", "spec"}, // entre A y B
{"leftdis", "da0000", "de0000", "di0000", "dd0000", "dp0000", "dn0000", "dt0000"},
{"leftdis", "z0", "grup.z"},
{"left", "rg", "rn"},
{"leftdis", "pt000000", "pe000000", "pd000000", "pp000000", "pi000000", "pn000000", "pr000000"},
{"left", "grup.adv", "w"}});
// etc.
nonTerminalInfo.put("conj", new String[][] {
{"leftdis", "cs", "cc"},
{"leftdis", "grup.cc", "grup.cs"},
{"left", "sp"}});
nonTerminalInfo.put("interjeccio", new String[][] {
{"leftdis", "i", "nc0s000", "nc0p000", "nc00000", "np00000", "pi000000"},
{"left", "interjeccio"}});
nonTerminalInfo.put("relatiu", new String[][] {{"left", "pr000000"}});
// prepositional phrases
nonTerminalInfo.put("sp", new String[][] {{"left", "prep", "sp"}});
nonTerminalInfo.put("prep", new String[][] {{"leftdis", "sp000", "prep", "grup.prep"}});
// custom categories
nonTerminalInfo.put("grup.cc", new String[][] {{"left", "cs"}});
nonTerminalInfo.put("grup.cs", new String[][] {{"left", "cs"}});
nonTerminalInfo.put("grup.prep", new String[][] {{"left", "prep", "grup.prep", "s"}});
nonTerminalInfo.put("grup.pron", new String[][] {{"rightdis", "px000000"}});
nonTerminalInfo.put("grup.w", new String[][] {{"right", "w"}, {"leftdis", "z0"}, {"left"}});
nonTerminalInfo.put("grup.z", new String[][] {
{"leftdis", "z0", "zu", "zp", "zd", "zm"},
{"right", "nc0s000", "nc0p000", "nc00000", "np00000"}});
}
/**
* Build a list of head rules containing all of the possible verb
* tags. The verbs are inserted in between <tt>toLeft</tt> and
* <tt>toRight</tt>.
*/
private String[] insertVerbs(String[] toLeft, String[] toRight) {
return ArrayUtils.concatenate(toLeft, ArrayUtils.concatenate(allVerbs, toRight));
}
/**
* Go through trees and determine their heads and print them.
* Just for debugging. <br>
* Usage: <code>
* java edu.stanford.nlp.trees.international.spanish.SpanishHeadFinder treebankFilePath
* </code>
*
* @param args The treebankFilePath
*/
public static void main(String[] args) {
Treebank treebank = new DiskTreebank();
CategoryWordTag.suppressTerminalDetails = true;
treebank.loadPath(args[0]);
final HeadFinder chf = new SpanishHeadFinder();
treebank.apply(new TreeVisitor() {
public void visitTree(Tree pt) {
// pt.percolateHeads(chf);
//pt.pennPrint();
Tree head = pt.headTerminal(chf);
//System.out.println("======== " + head.label());
}
});
}
}