/**
* Copyright (c) 2010, Regents of the University of Colorado All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer. Redistributions in binary
* form must reproduce the above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or other materials provided
* with the distribution. Neither the name of the University of Colorado at
* Boulder nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package clear.treebank;
/**
* English Treebank library.
*
* @author Jinho D. Choi <b>Last update:</b> 9/1/2010
*/
public class TBEnLib extends TBLib {
// clausal-level pos-tags
static final public String POS_S = "S";
static final public String POS_SBAR = "SBAR";
static final public String POS_SBARQ = "SBARQ";
static final public String POS_SINV = "SINV";
static final public String POS_SQ = "SQ";
// phrasal-level pos-tags
static final public String POS_ADJP = "ADJP";
static final public String POS_ADVP = "ADVP";
static final public String POS_CAPTION = "CAPTION";
static final public String POS_CIT = "CIT";
static final public String POS_CONJP = "CONJP";
static final public String POS_EDITED = "EDITED";
static final public String POS_EMBED = "EMBED";
static final public String POS_FRAG = "FRAG";
static final public String POS_INTJ = "INTJ";
static final public String POS_LST = "LST";
static final public String POS_META = "META";
static final public String POS_NAC = "NAC";
static final public String POS_NML = "NML";
static final public String POS_NP = "NP";
static final public String POS_NX = "NX";
static final public String POS_PP = "PP";
static final public String POS_PRN = "PRN";
static final public String POS_PRT = "PRT";
static final public String POS_QP = "QP";
static final public String POS_RRC = "RRC";
static final public String POS_UCP = "UCP";
static final public String POS_UH = "UH";
static final public String POS_VP = "VP";
static final public String POS_WHADJP = "WHADJP";
static final public String POS_WHADVP = "WHADVP";
static final public String POS_WHNP = "WHNP";
static final public String POS_WHPP = "WHPP";
static final public String POS_X = "X";
// word-level pos-tags
static final public String POS_ADD = "ADD";
static final public String POS_AFX = "AFX";
static final public String POS_CC = "CC";
static final public String POS_CD = "CD";
static final public String POS_CODE = "CODE";
static final public String POS_DT = "DT";
static final public String POS_DOLLAR = "$";
static final public String POS_EX = "EX";
static final public String POS_FW = "FW";
static final public String POS_IN = "IN";
static final public String POS_JJ = "JJ";
static final public String POS_JJR = "JJR";
static final public String POS_JJS = "JJS";
static final public String POS_LS = "LS";
static final public String POS_MD = "MD";
static final public String POS_NN = "NN";
static final public String POS_NNS = "NNS";
static final public String POS_NNP = "NNP";
static final public String POS_NNPS = "NNPS";
static final public String POS_PDT = "PDT";
static final public String POS_POS = "POS";
static final public String POS_PRP = "PRP";
static final public String POS_PRP$ = "PRP$";
static final public String POS_RB = "RB";
static final public String POS_RBR = "RBR";
static final public String POS_RBS = "RBS";
static final public String POS_RP = "RP";
static final public String POS_TO = "TO";
static final public String POS_VB = "VB";
static final public String POS_VBD = "VBD";
static final public String POS_VBG = "VBG";
static final public String POS_VBN = "VBN";
static final public String POS_VBP = "VBP";
static final public String POS_VBZ = "VBZ";
static final public String POS_WDT = "WDT";
static final public String POS_WP = "WP";
static final public String POS_WP$ = "WP$";
static final public String POS_WRB = "WRB";
static final public String POS_XX = "XX";
// punctuation pos-tags
static final public String POS_COLON = ":";
static final public String POS_COMMA = ",";
static final public String POS_HYPH = "HYPH";
static final public String POS_LDQ = "``";
static final public String POS_LRB = "-LRB-";
static final public String POS_NFP = "NFP";
static final public String POS_PERIOD = ".";
static final public String POS_RDQ = "''";
static final public String POS_RRB = "-RRB-";
static final public String POS_SYM = "SYM";
// function tags
static final public String TAG_ADV = "ADV";
static final public String TAG_BNF = "BNF";
static final public String TAG_CLF = "CLF";
static final public String TAG_CLR = "CLR";
static final public String TAG_DIR = "DIR";
static final public String TAG_DTV = "DTV";
static final public String TAG_ETC = "ETC";
static final public String TAG_EXT = "EXT";
static final public String TAG_HLN = "HLN";
static final public String TAG_IMP = "IMP";
static final public String TAG_LGS = "LGS";
static final public String TAG_LOC = "LOC";
static final public String TAG_MNR = "MNR";
static final public String TAG_NOM = "NOM";
static final public String TAG_PRD = "PRD";
static final public String TAG_PRP = "PRP";
static final public String TAG_PUT = "PUT";
static final public String TAG_SBJ = "SBJ";
static final public String TAG_SEZ = "SEZ";
static final public String TAG_TMP = "TMP";
static final public String TAG_TPC = "TPC";
static final public String TAG_TTL = "TTL";
static final public String TAG_UNF = "UNF";
static final public String TAG_VOC = "VOC";
// empty categories
static final public String EC_EXP = "*EXP*";
static final public String EC_ELLIPSE = "*?*";
static final public String EC_ICH = "*ICH*";
static final public String EC_NOT = "*NOT*";
static final public String EC_NULL = "0";
static final public String EC_PPA = "*PPA*";
static final public String EC_PRO = "*PRO*";
static final public String EC_RNR = "*RNR*";
static final public String EC_STAR = "*";
static final public String EC_TRACE = "*T*";
static final public String EC_UNIT = "*U*";
static public boolean isConjunction(String pos) {
return isWordConjunction(pos) || isPuncConjunction(pos);
}
static public boolean isWordConjunction(String pos) {
return pos.equals(POS_CC) || pos.equals(POS_CONJP);
}
static public boolean isPuncConjunction(String pos) {
return pos.equals(POS_COMMA) || pos.equals(POS_COLON);
}
static public boolean isCorrelativeConjunction(String words) {
words = words.toLowerCase();
return words.equals("either") || words.equals("neither") || words.equals("whether") || words.equals("both") || words.equals("not only");
}
static public boolean isNounLike(String pos) {
return isNoun(pos) || pos.equals(POS_NP) || pos.equals(POS_NML) || pos.equals(POS_WHNP) || pos.contains(TAG_NOM);
}
static public boolean isNoun(String pos) {
return pos.startsWith(POS_NN) || pos.equals(POS_PRP);
}
static public boolean isVerb(String pos) {
return pos.startsWith(POS_VB) || pos.equals(POS_MD);
}
static public boolean isAdjectiveLike(String pos) {
return isAdjective(pos) || pos.equals(POS_ADJP);
}
static public boolean isAdjective(String pos) {
return pos.startsWith(POS_JJ);
}
static public boolean isAdverb(String pos) {
return pos.startsWith(POS_RB);
}
static public boolean isWhAdverbLike(String pos) {
return pos.equals(POS_WHADVP) || pos.equals(POS_WRB) || pos.equals(POS_WHPP) || pos.equals(POS_IN);
}
static public boolean isPunctuation(String pos) {
return pos.equals(POS_COLON) || pos.equals(POS_COMMA) || pos.equals(POS_PERIOD) || pos.equals(POS_NFP) || pos.equals(POS_HYPH) || pos.equals(POS_SYM)
|| pos.equals(POS_LDQ) || pos.equals(POS_RDQ) || pos.equals(POS_LRB) || pos.equals(POS_RRB);
}
static public boolean isSentence(String pos) {
return pos.equals(POS_S) || pos.equals(POS_SBAR) || pos.equals(POS_SBARQ) || pos.equals(POS_SINV) || pos.equals(POS_SQ);
}
static public boolean isBe(String form) {
return form.matches("be|been|being|am|is|was|are|were|'m|'s|'re");
}
static public boolean isHave(String form) {
return form.matches("have|has|had|having|'ve|'d");
}
static public boolean isDo(String form) {
return form.matches("do|does|did|done|doing");
}
static public boolean isGet(String form) {
return form.equals("get") || form.equals("gets") || form.equals("got") || form.equals("gotten") || form.equals("getting");
}
static public boolean isBecome(String form) {
return form.matches("become|becomes|became");
}
static public boolean isAux(String form) {
return isBe(form) || isHave(form) || isDo(form);
}
static public boolean isLightVerb(String form) {
return form.equals("take") || form.equals("takes") || form.equals("took") || form.equals("taken") || form.equals("taking")
|| form.equals("give") || form.equals("gives") || form.equals("gave") || form.equals("given") || form.equals("giving")
|| form.equals("make") || form.equals("makes") || form.equals("made") || form.equals("making")
|| form.equals("do") || form.equals("does") || form.equals("did") || form.equals("done") || form.equals("doing")
|| form.equals("have") || form.equals("has") || form.equals("had") || form.equals("having");
}
static public boolean isComplementizer(String form) {
form = form.toLowerCase();
return form.matches("what|when|where|which|who|whom|whose|why|how|that");
}
}