package hu.u_szeged.nlp.pos;
import java.util.Collection;
import java.util.LinkedHashSet;
public class HyphenicWord {
public static LinkedHashSet<String> analyseHyphenicCompoundWord(String hyphenicCompoundWord) {
LinkedHashSet<String> analises = null;
analises = new LinkedHashSet<String>();
if (!hyphenicCompoundWord.contains("-")) {
return analises;
}
String firstPart = null;
String secondPart = null;
int hyphenPosition = 0;
hyphenPosition = hyphenicCompoundWord.indexOf('-');
firstPart = hyphenicCompoundWord.substring(0, hyphenPosition);
secondPart = hyphenicCompoundWord.substring(hyphenPosition + 1, hyphenicCompoundWord.length());
// a kötőjel előtti és a kötőjel utáni résznek is van elemzése (pl.:
// adat-kezelőt)
if (CompoundWord.isBisectable(firstPart + secondPart)) {
analises = CompoundWord.getCompatibleAnalises(firstPart, secondPart, true);
}
// a kötőjel előtti résznek is van elemzése, a kötőjel utáni rész két részre
// bontható
else if (MagyarlancResourceHolder.getRFSA().analyse(firstPart).size() > 0 && CompoundWord.isBisectable(secondPart)) {
Collection<String> firstPartAnalises = null;
firstPartAnalises = MagyarlancResourceHolder.getRFSA().analyse(firstPart);
String firstPartOfSecondSection = null;
String secondPartOfSecondSection = null;
LinkedHashSet<String> secondSectionAnalises = null;
int bisectIndex = 0;
bisectIndex = CompoundWord.bisectIndex(secondPart);
firstPartOfSecondSection = secondPart.substring(0, bisectIndex);
secondPartOfSecondSection = secondPart.substring(bisectIndex, secondPart.length());
secondSectionAnalises = CompoundWord.getCompatibleAnalises(firstPartOfSecondSection, secondPartOfSecondSection);
for (String firstAnalyse : firstPartAnalises) {
for (String secondAnalyse : secondSectionAnalises) {
if (CompoundWord.isCompatibleAnalyises(KRUtils.getRoot(firstAnalyse), KRUtils.getRoot(secondAnalyse))) {
if (analises == null) {
analises = new LinkedHashSet<String>();
}
analises.add(KRUtils.getRoot(secondAnalyse).replace("$", "$" + firstPart + "-"));
}
}
}
}
else if (CompoundWord.isBisectable(firstPart) && MagyarlancResourceHolder.getRFSA().analyse(secondPart).size() > 0) {
Collection<String> secondPartAnalises = null;
secondPartAnalises = MagyarlancResourceHolder.getRFSA().analyse(secondPart);
String firstSectionOfFirstPart = null;
String secondSectionOfFirstPart = null;
LinkedHashSet<String> firstPartAnalises = null;
int bisectIndex = 0;
bisectIndex = CompoundWord.bisectIndex(firstPart);
firstSectionOfFirstPart = firstPart.substring(0, bisectIndex);
secondSectionOfFirstPart = firstPart.substring(bisectIndex, firstPart.length());
firstPartAnalises = CompoundWord.getCompatibleAnalises(firstSectionOfFirstPart, secondSectionOfFirstPart);
for (String firstAnalyse : firstPartAnalises) {
for (String secondAnalyse : secondPartAnalises) {
if (CompoundWord.isCompatibleAnalyises(KRUtils.getRoot(firstAnalyse), KRUtils.getRoot(secondAnalyse))) {
if (analises == null) {
analises = new LinkedHashSet<String>();
}
analises.add(KRUtils.getRoot(secondAnalyse).replace("$", "$" + firstPart + "-"));
}
}
}
}
return analises;
}
public static void main(String[] args) {
}
}