package edu.stanford.nlp.trees.international.pennchinese;
import edu.stanford.nlp.trees.AbstractCollinsHeadFinder;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.Generics;
/**
* A HeadFinder for Chinese based on rules described in Sun/Jurafsky NAACL 2004.
*
* @author Galen Andrew
* @version Jul 12, 2004
*/
public class SunJurafskyChineseHeadFinder extends AbstractCollinsHeadFinder {
private static final long serialVersionUID = -7942375587642755210L;
public SunJurafskyChineseHeadFinder() {
this(new ChineseTreebankLanguagePack());
}
public SunJurafskyChineseHeadFinder(TreebankLanguagePack tlp) {
super(tlp);
defaultRule = new String[]{"right"};
nonTerminalInfo = Generics.newHashMap();
nonTerminalInfo.put("ROOT", new String[][]{{"left", "IP"}});
nonTerminalInfo.put("PAIR", new String[][]{{"left", "IP"}});
nonTerminalInfo.put("ADJP", new String[][]{{"right", "ADJP", "JJ", "AD"}});
nonTerminalInfo.put("ADVP", new String[][]{{"right", "ADVP", "AD", "CS", "JJ", "NP", "PP", "P", "VA", "VV"}});
nonTerminalInfo.put("CLP", new String[][]{{"right", "CLP", "M", "NN", "NP"}});
nonTerminalInfo.put("CP", new String[][]{{"right", "CP", "IP", "VP"}});
nonTerminalInfo.put("DNP", new String[][]{{"right", "DEG", "DNP", "DEC", "QP"}});
nonTerminalInfo.put("DP", new String[][]{{"left", "M", "DP", "DT", "OD"}});
nonTerminalInfo.put("DVP", new String[][]{{"right", "DEV", "AD", "VP"}});
nonTerminalInfo.put("IP", new String[][]{{"right", "VP", "IP", "NP"}});
nonTerminalInfo.put("LCP", new String[][]{{"right", "LCP", "LC"}});
nonTerminalInfo.put("LST", new String[][]{{"right", "CD", "NP", "QP"}});
nonTerminalInfo.put("NP", new String[][]{{"right", "NP", "NN", "IP", "NR", "NT"}});
nonTerminalInfo.put("PP", new String[][]{{"left", "P", "PP"}});
nonTerminalInfo.put("PRN", new String[][]{{"left", "PU"}});
nonTerminalInfo.put("QP", new String[][]{{"right", "QP", "CLP", "CD"}});
nonTerminalInfo.put("UCP", new String[][]{{"left", "IP", "NP", "VP"}});
nonTerminalInfo.put("VCD", new String[][]{{"left", "VV", "VA", "VE"}});
nonTerminalInfo.put("VP", new String[][]{{"left", "VE", "VC", "VV", "VNV", "VPT", "VRD", "VSB", "VCD", "VP"}});
nonTerminalInfo.put("VPT", new String[][]{{"left", "VA", "VV"}});
nonTerminalInfo.put("VCP", new String[][]{{"left"}});
nonTerminalInfo.put("VNV", new String[][]{{"left"}});
nonTerminalInfo.put("VRD", new String[][]{{"left", "VV", "VA"}});
nonTerminalInfo.put("VSB", new String[][]{{"right", "VV", "VE"}});
nonTerminalInfo.put("FRAG", new String[][]{{"right", "VV", "NN"}}); //FRAG seems only to be used for bits at the beginnings of articles: "Xinwenshe<DATE>" and "(wan)"
// some POS tags apparently sit where phrases are supposed to be
nonTerminalInfo.put("CD", new String[][]{{"right", "CD"}});
nonTerminalInfo.put("NN", new String[][]{{"right", "NN"}});
nonTerminalInfo.put("NR", new String[][]{{"right", "NR"}});
// I'm adding these POS tags to do primitive morphology for character-level
// parsing. It shouldn't affect anything else because heads of preterminals are not
// generally queried - GMA
nonTerminalInfo.put("VV", new String[][]{{"left"}});
nonTerminalInfo.put("VA", new String[][]{{"left"}});
nonTerminalInfo.put("VC", new String[][]{{"left"}});
nonTerminalInfo.put("VE", new String[][]{{"left"}});
}
/* Yue Zhang and Stephen Clark 2008 based their rules on Sun/Jurafsky but changed a few things.
Constituent Rules
ADJP r ADJP JJ AD; r
ADVP r ADVP AD CS JJ NP PP P VA VV; r
CLP r CLP M NN NP; r
CP r CP IP VP; r
DNP r DEG DNP DEC QP; r
DP r M; l DP DT OD; l
DVP r DEV AD VP; r
FRAG r VV NR NN NT; r
IP r VP IP NP; r
LCP r LCP LC; r
LST r CD NP QP; r
NP r NP NN IP NR NT; r
NN r NP NN IP NR NT; r
PP l P PP; l
PRN l PU; l
QP r QP CLP CD; r
UCP l IP NP VP; l
VCD l VV VA VE; l
VP l VE VC VV VNV VPT VRD VSB
VCD VP; l
VPT l VA VV; l
VRD l VVI VA; l
VSB r VV VE; r
default r
*/
}