package clear.morph;
import clear.dep.DepLib;
import clear.ftr.xml.DepFtrXml;
import clear.util.tuple.JMorphTuple;
import java.util.ArrayDeque;
import java.util.ArrayList;
public class MorphKr {
static public int SEL_FST_ST = 0;
static public int SEL_LST_ST = 1;
static public int SEL_LST_CP = 2;
static public int SEL_LST_EM = 3;
static public int SEL_LST_DS = 4;
static public int SEL_LST_PS = 5;
private JMorphTuple[] t_morphems;
public boolean isX;
public MorphKr() {
t_morphems = new JMorphTuple[6];
for (int i = 0; i < 5; i++) {
t_morphems[i] = new JMorphTuple(DepLib.ROOT_TAG, DepLib.ROOT_TAG);
}
}
public MorphKr(String lemma) {
ArrayList<JMorphTuple> list = getAllMorphems(lemma);
list = removeParens(list);
t_morphems = new JMorphTuple[6];
isX = MorphKrAnalyzer.isX(list);
initPunctuation(list);
initMorphems(list);
/*
* System.out.println("-- "+lemma+" --");
*
* for (JMorphTuple tup : t_morphems) { if (tup != null)
* System.out.println(tup.toString()); else System.out.println("Null");
}
*/
}
static public ArrayList<JMorphTuple> getAllMorphems(String lemma) {
String[] tmp = lemma.replaceAll("\\+/", "PLUS/").split("\\+");
ArrayList<JMorphTuple> list = new ArrayList<>(tmp.length);
String stem, pos;
int idx;
// System.out.println(Arrays.toString(tmp));
for (String str : tmp) {
idx = str.lastIndexOf("/");
stem = str.substring(0, idx).trim();
pos = str.substring(idx + 1).trim();
// pos = sejongToMach(pos);
if (stem.equals("PLUS")) {
stem = "+";
} else if (stem.matches("\\d+")) {
stem = "0";
}
list.add(new JMorphTuple(stem, pos));
}
return list;
}
static public String sejongToMach(String pos) {
if (pos.matches("NNG|NNP|SL|SH")) {
return "NN";
}
if (pos.matches("NNB")) {
return "NX";
}
if (pos.matches("NP")) {
return "NP";
}
if (pos.matches("NR|SN")) {
return "NU";
}
if (pos.matches("VV")) {
return "VV";
}
if (pos.matches("VA|VCN")) {
return "AJ";
}
if (pos.matches("VCP")) {
return "CP";
}
if (pos.matches("VX")) {
return "VX";
}
if (pos.matches("MM")) {
return "DT";
}
if (pos.matches("MA.*")) {
return "AD";
}
if (pos.matches("J.*")) {
return "JO";
}
if (pos.matches("EP")) {
return "EP";
}
if (pos.matches("E.*")) {
return "EM";
}
if (pos.matches("XPN")) {
return "PF";
}
if (pos.matches("XSN")) {
return "SN";
}
if (pos.matches("XSV")) {
return "SV";
}
if (pos.matches("XSA")) {
return "SJ";
}
if (pos.matches("IC")) {
return "IJ";
}
if (pos.matches("NF")) {
return "NR";
}
if (pos.matches("NA|NV|XR")) {
return "UK";
}
if (pos.matches("S.*")) {
return "SY";
}
System.out.println(pos);
return pos;
}
private ArrayList<JMorphTuple> removeParens(ArrayList<JMorphTuple> list) {
ArrayDeque<JMorphTuple> deque = new ArrayDeque<>();
JMorphTuple sub;
int count = 0;
for (JMorphTuple tup : list) {
if (tup.lemma.equals(")") && count > 0) {
do {
sub = deque.removeLast();
} while (!sub.lemma.equals("("));
deque.add(new JMorphTuple("(*)", "LR"));
count--;
} else {
if (tup.lemma.equals("(")) {
count++;
}
deque.add(tup);
}
}
return new ArrayList<>(deque);
}
private void initPunctuation(ArrayList<JMorphTuple> list) {
int i, size = list.size();
JMorphTuple tup = list.get(size - 1);
if (MorphKrAnalyzer.isPunctuation(tup.pos)) {
t_morphems[SEL_LST_PS] = tup;
}
ArrayList<JMorphTuple> remove = new ArrayList<>();
for (i = 0; i < size; i++) {
tup = list.get(i);
if (MorphKrAnalyzer.isPunctuation(tup.pos)) {
remove.add(tup);
}
}
list.removeAll(remove);
if (list.isEmpty()) {
isX = true;
}
}
private void initMorphems(ArrayList<JMorphTuple> list) {
int i, size = list.size(), idx = size - 1;
JMorphTuple tup;
for (i = size - 1; i >= 0; i--) {
tup = list.get(i);
if (MorphKrAnalyzer.isCaseParticle(tup.pos)) {
if (t_morphems[SEL_LST_CP] == null) {
t_morphems[SEL_LST_CP] = tup;
}
idx = i - 1;
} else if (MorphKrAnalyzer.isEndingMarker(tup.pos)) {
if (t_morphems[SEL_LST_EM] == null) {
t_morphems[SEL_LST_EM] = tup;
}
idx = i - 1;
} else if (t_morphems[SEL_LST_DS] == null && MorphKrAnalyzer.isDerivationalSuffix(tup.pos)) {
t_morphems[SEL_LST_DS] = tup;
}
}
if (idx >= 0) {
t_morphems[SEL_LST_ST] = list.get(idx);
}
if (size > 0) {
t_morphems[SEL_FST_ST] = list.get(0);
}
}
public String getMorphem(int loc, String type) {
if (t_morphems[loc] == null) {
return null;
}
if (type.equals(DepFtrXml.F_LEMMA)) {
return t_morphems[loc].lemma;
} else {
return t_morphems[loc].pos;
}
}
@Override
public String toString() {
StringBuilder build = new StringBuilder();
for (JMorphTuple tup : t_morphems) {
if (tup == null) {
build.append("null");
} else {
build.append(tup.toString());
}
build.append(" | ");
}
return build.toString().trim();
}
}