/**
* Copyright (c) 2010, Regents of the University of Colorado All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer. Redistributions in binary
* form must reproduce the above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or other materials provided
* with the distribution. Neither the name of the University of Colorado at
* Boulder nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package clear.treebank;
import clear.util.tuple.JObjectObjectTuple;
import java.util.ArrayList;
/**
* English Treebank library.
*
* @author Jinho D. Choi <b>Last update:</b> 9/1/2010
*/
public class TBKrLib extends TBLib {
// clausal-level pos-tags
static final public String POS_S = "S";
static final public String POS_AP = "AP";
static final public String POS_DP = "DP";
static final public String POS_NP = "NP";
static final public String POS_IP = "IP";
static final public String POS_VP = "VP";
static final public String POS_VNP = "VNP";
static final public String POS_Q = "Q";
static final public String POS_X = "X";
static final public String POS_L = "L";
static final public String POS_R = "R";
// function tags
static final public String TAG_AJT = "AJT";
static final public String TAG_CMP = "CMP";
static final public String TAG_CNJ = "CNJ";
static final public String TAG_INT = "INT";
static final public String TAG_MOD = "MOD";
static final public String TAG_OBJ = "OBJ";
static final public String TAG_PRN = "PRN";
static final public String TAG_SBJ = "SBJ";
static final public String DEP_P = "P";
static final public String DEP_ADV = "ADV";
static final public String DEP_MOD = "MOD";
static final public String DEP_AMOD = "AMOD";
static final public String DEP_DMOD = "DMOD";
static final public String DEP_NMOD = "NMOD";
static final public String DEP_VMOD = "VMOD";
static final public String DEP_QMOD = "QMOD";
static public ArrayList<JObjectObjectTuple<String, String>> splitMorphem(String morphem) {
if (morphem.startsWith("+")) {
morphem = morphem.substring(1);
}
morphem = morphem.replaceAll("//", "-FS-/");
morphem = morphem.replaceAll("\\+/", "-PS-/");
String[] tmp = morphem.split("\\+|/");
ArrayList<JObjectObjectTuple<String, String>> list = new ArrayList<>(tmp.length / 2);
if (tmp.length % 2 != 0) {
return list;
}
for (int i = 0; i < tmp.length; i += 2) {
switch (tmp[i]) {
case "-FS-":
tmp[i] = "/";
break;
case "-PS-":
tmp[i] = "+";
break;
}
list.add(new JObjectObjectTuple<>("_" + tmp[i], tmp[i + 1]));
}
return list;
}
static public boolean isPunctuation(String morphem) {
for (JObjectObjectTuple<String, String> tup : splitMorphem(morphem)) {
if (!tup.o2.matches("SF|SP|SS|SE|SO|SW")) {
return false;
}
}
return true;
}
}