package edu.stanford.nlp.trees.international.hebrew;
import edu.stanford.nlp.trees.AbstractTreebankLanguagePack;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.LeftHeadFinder;
import edu.stanford.nlp.trees.TreeReaderFactory;
/**
*
* @author Spence Green
*
*/
public class HebrewTreebankLanguagePack extends AbstractTreebankLanguagePack {
private static final long serialVersionUID = 4787589385598144401L;
private static final String[] pennPunctTags = {"yyCLN", "yyCM","yyDASH","yyDOT","yyEXCL","yyLRB","yyQM","yyQUOT","yyRRB","yySCLN"};
private static final String[] pennSFPunctTags = {"yyDOT","yyEXCL","yyQM"};
private static final String[] collinsPunctTags = {"-NONE-","yyCLN", "yyCM","yyDASH","yyDOT","yyEXCL","yyLRB","yyQM","yyQUOT","yyRRB","yySCLN"};;
private static final char[] annotationIntroducingChars = {'-', '=', '|', '#', '^', '~'};
/**
* wsg: This is the convention in Reut's preprocessed version of the treebank, and the Collins stuff.
* But we could change it to ROOT....
*/
private static final String[] pennStartSymbols = {"TOP"};
@Override
public String[] punctuationTags() {
return pennPunctTags;
}
@Override
public String[] punctuationWords() {
return pennPunctTags;//Same as PTB
}
@Override
public String[] sentenceFinalPunctuationTags() {
return pennSFPunctTags;
}
@Override
public String[] startSymbols() {
return pennStartSymbols;
}
//TODO: Need to add Reut's rules
public HeadFinder headFinder() {
return new LeftHeadFinder();
}
//TODO: Need to add Reut's rules
public HeadFinder typedDependencyHeadFinder() {
return new LeftHeadFinder();
}
public String[] sentenceFinalPunctuationWords() {
return pennSFPunctTags;
}
@Override
public String[] evalBIgnoredPunctuationTags() {
return collinsPunctTags;
}
public String treebankFileExtension() {
return "tree";
}
@Override
public char[] labelAnnotationIntroducingCharacters() {
return annotationIntroducingChars;
}
@Override
public TreeReaderFactory treeReaderFactory() {
return new HebrewTreeReaderFactory();
}
}