/*
Copyright (C) 2003 Pierrick Brihaye
pierrick.brihaye@wanadoo.fr
Original Perl code :
Portions (c) 2002 QAMUS LLC (www.qamus.org),
(c) 2002 Trustees of the University of Pennsylvania
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the
Free Software Foundation, Inc.
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
or connect to:
http://www.fsf.org/copyleft/gpl.html
*/
package marmot.thirdparty.aramorph;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
/** A solution for a word.
* @author Pierrick Brihaye, 2003*/
public class Solution {
private List<String> features_;
public List<String> getFeatures() {
return features_;
}
private final static List<String> prefix_features_ = new ArrayList<String>();
private final static List<String> suffix_features_ = new ArrayList<String>();
static {
prefix_features_.add("CONJ"); //TODO : approve
prefix_features_.add("EMPHATIC_PARTICLE"); //TODO : approve
prefix_features_.add("FUNC_WORD"); //TODO : approve
prefix_features_.add("FUT_PART"); //TODO : approve
prefix_features_.add("INTERJ"); //TODO : approve
prefix_features_.add("INTERROG_PART"); //TODO : approve
prefix_features_.add("IV1S");
prefix_features_.add("IV2MS");
prefix_features_.add("IV2FS");
prefix_features_.add("IV3MS");
prefix_features_.add("IV3FS");
prefix_features_.add("IV2D");
prefix_features_.add("IV2FD");
prefix_features_.add("IV3MD");
prefix_features_.add("IV3FD");
prefix_features_.add("IV1P");
prefix_features_.add("IV2MP");
prefix_features_.add("IV2FP");
prefix_features_.add("IV3MP");
prefix_features_.add("IV3FP");
prefix_features_.add("NEG_PART"); //TODO : approve
prefix_features_.add("PREP"); //TODO : approve
prefix_features_.add("RESULT_CLAUSE_PARTICLE");
suffix_features_.add("CASE_INDEF_NOM");
suffix_features_.add("CASE_INDEF_ACC");
suffix_features_.add("CASE_INDEF_ACCGEN");
suffix_features_.add("CASE_INDEF_GEN");
suffix_features_.add("CASE_DEF_NOM");
suffix_features_.add("CASE_DEF_ACC");
suffix_features_.add("CASE_DEF_ACCGEN");
suffix_features_.add("CASE_DEF_GEN");
suffix_features_.add("NSUFF_MASC_SG_ACC_INDEF");
suffix_features_.add("NSUFF_FEM_SG");
suffix_features_.add("NSUFF_MASC_DU_NOM");
suffix_features_.add("NSUFF_MASC_DU_NOM_POSS");
suffix_features_.add("NSUFF_MASC_DU_ACCGEN");
suffix_features_.add("NSUFF_MASC_DU_ACCGEN_POSS");
suffix_features_.add("NSUFF_FEM_DU_NOM");
suffix_features_.add("NSUFF_FEM_DU_NOM_POSS");
suffix_features_.add("NSUFF_FEM_DU_ACCGEN");
suffix_features_.add("NSUFF_FEM_DU_ACCGEN_POSS");
suffix_features_.add("NSUFF_MASC_PL_NOM");
suffix_features_.add("NSUFF_MASC_PL_NOM_POSS");
suffix_features_.add("NSUFF_MASC_PL_ACCGEN");
suffix_features_.add("NSUFF_MASC_PL_ACCGEN_POSS");
suffix_features_.add("NSUFF_FEM_PL");
suffix_features_.add("POSS_PRON_1S");
suffix_features_.add("POSS_PRON_2MS");
suffix_features_.add("POSS_PRON_2FS");
suffix_features_.add("POSS_PRON_3MS");
suffix_features_.add("POSS_PRON_3FS");
suffix_features_.add("POSS_PRON_2D");
suffix_features_.add("POSS_PRON_3D");
suffix_features_.add("POSS_PRON_1P");
suffix_features_.add("POSS_PRON_2MP");
suffix_features_.add("POSS_PRON_2FP");
suffix_features_.add("POSS_PRON_3MP");
suffix_features_.add("POSS_PRON_3FP");
suffix_features_.add("IVSUFF_DO:1S");
suffix_features_.add("IVSUFF_DO:2MS");
suffix_features_.add("IVSUFF_DO:2FS");
suffix_features_.add("IVSUFF_DO:3MS");
suffix_features_.add("IVSUFF_DO:3FS");
suffix_features_.add("IVSUFF_DO:2D");
suffix_features_.add("IVSUFF_DO:3D");
suffix_features_.add("IVSUFF_DO:1P");
suffix_features_.add("IVSUFF_DO:2MP");
suffix_features_.add("IVSUFF_DO:2FP");
suffix_features_.add("IVSUFF_DO:3MP");
suffix_features_.add("IVSUFF_DO:3FP");
suffix_features_.add("IVSUFF_MOOD:I");
suffix_features_.add("IVSUFF_SUBJ:2FS_MOOD:I");
suffix_features_.add("IVSUFF_SUBJ:D_MOOD:I");
suffix_features_.add("IVSUFF_SUBJ:3D_MOOD:I");
suffix_features_.add("IVSUFF_SUBJ:MP_MOOD:I");
suffix_features_.add("IVSUFF_MOOD:S");
suffix_features_.add("IVSUFF_SUBJ:2FS_MOOD:SJ");
suffix_features_.add("IVSUFF_SUBJ:D_MOOD:SJ");
suffix_features_.add("IVSUFF_SUBJ:MP_MOOD:SJ");
suffix_features_.add("IVSUFF_SUBJ:3MP_MOOD:SJ");
suffix_features_.add("IVSUFF_SUBJ:FP");
suffix_features_.add("PVSUFF_DO:1S");
suffix_features_.add("PVSUFF_DO:2MS");
suffix_features_.add("PVSUFF_DO:2FS");
suffix_features_.add("PVSUFF_DO:3MS");
suffix_features_.add("PVSUFF_DO:3FS");
suffix_features_.add("PVSUFF_DO:2D");
suffix_features_.add("PVSUFF_DO:3D");
suffix_features_.add("PVSUFF_DO:1P");
suffix_features_.add("PVSUFF_DO:2MP");
suffix_features_.add("PVSUFF_DO:2FP");
suffix_features_.add("PVSUFF_DO:3MP");
suffix_features_.add("PVSUFF_DO:3FP");
suffix_features_.add("PVSUFF_SUBJ:1S");
suffix_features_.add("PVSUFF_SUBJ:2MS");
suffix_features_.add("PVSUFF_SUBJ:2FS");
suffix_features_.add("PVSUFF_SUBJ:3MS");
suffix_features_.add("PVSUFF_SUBJ:3FS");
suffix_features_.add("PVSUFF_SUBJ:2MD");
suffix_features_.add("PVSUFF_SUBJ:2FD");
suffix_features_.add("PVSUFF_SUBJ:3MD");
suffix_features_.add("PVSUFF_SUBJ:3FD");
suffix_features_.add("PVSUFF_SUBJ:1P");
suffix_features_.add("PVSUFF_SUBJ:2MP");
suffix_features_.add("PVSUFF_SUBJ:2FP");
suffix_features_.add("PVSUFF_SUBJ:3MP");
suffix_features_.add("PVSUFF_SUBJ:3FP");
suffix_features_.add("CVSUFF_DO:1S");
suffix_features_.add("CVSUFF_DO:3MS");
suffix_features_.add("CVSUFF_DO:3FS");
suffix_features_.add("CVSUFF_DO:3D");
suffix_features_.add("CVSUFF_DO:1P");
suffix_features_.add("CVSUFF_DO:3MP");
suffix_features_.add("CVSUFF_DO:3FP");
suffix_features_.add("CVSUFF_SUBJ:2MS");
suffix_features_.add("CVSUFF_SUBJ:2FS");
suffix_features_.add("CVSUFF_SUBJ:2MP");
}
/** Constructs a solution for a word. Note that the prefix, stem and suffix combination is <STRONG>recomputed</STRONG>
* and may not necessarily match with the information provided by the dictionaries.
* @param cnt Order in sequence ; not very useful actually
* @param prefix The prefix as provided by the prefixes dictionnary
* @param stem The stem as provided by the stems dictionnary
* @param suffix The suffix as provided by the suffixes dictionnary
*/
protected Solution(DictionaryEntry prefix, DictionaryEntry stem, DictionaryEntry suffix) {
LinkedList<String> prefixes = new LinkedList<String>(Arrays.asList(prefix.getPOS()));
LinkedList<String> stems = new LinkedList<String>(Arrays.asList(stem.getPOS()));
LinkedList<String> suffixes = new LinkedList<String>(Arrays.asList(suffix.getPOS()));
//Normalize stems since some of them can contain prefixes
while (stems.size() > 0) {
String stem_feat = (String)stems.getFirst();
boolean found_prefix = false;
for (String prefix_feat : prefix_features_) {
if (stem_feat.endsWith(prefix_feat)) { //TODO : approve
stems.removeFirst();
prefixes.addLast(stem_feat);
found_prefix = true;
break;
}
}
if (!found_prefix)
break;
}
//Normalize stems since some of them can contain suffixes
while (stems.size() > 0) {
String stem_feat = (String)stems.getLast();
boolean found_suffix = false;
for (String suffix_feat : suffix_features_) {
if (stem_feat.endsWith(suffix_feat)) {
stems.removeLast();
suffixes.addFirst(stem_feat);
found_suffix = true;
break;
}
}
if (!found_suffix)
break;
}
features_ = new ArrayList<String>();
features_.addAll(prefixes);
if (stems.isEmpty()) {
features_.add("NOSTEM");
} else {
features_.add(stems.getFirst());
}
features_.addAll(suffixes);
for (int i=0; i<features_.size(); i++) {
features_.set(i, simplify(features_.get(i)));
}
}
public String simplify(String string) {
int index = string.indexOf('/');
if (index >= 0) {
string = string.substring(index + 1);
}
return string;
}
}