// Copyright 2014 Thomas Müller
// This file is part of MarMoT, which is licensed under GPLv3.
package marmot.morph.analyzer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import marmot.thirdparty.aramorph.AraMorph;
import marmot.thirdparty.aramorph.Solution;
public class ArabicAnalyzer extends Analyzer {
private static final long serialVersionUID = 1L;
transient private AraMorph aramorph_;
private boolean subfeatures_;
public ArabicAnalyzer(boolean subfeatures) {
aramorph_ = null;
subfeatures_ = subfeatures;
}
@Override
public List<String> analyze(String form) {
if (aramorph_ == null) {
aramorph_ = new AraMorph();
}
Set<Solution> solutions = aramorph_.analyzeToken(form);
if (solutions == null || solutions.isEmpty()) {
return null;
}
StringBuilder sb = new StringBuilder();
Set<String> set = new HashSet<String>();
for (Solution solution : solutions) {
sb.setLength(0);
List<String> feats = solution.getFeatures();
for (String feat : feats) {
if (!feat.isEmpty()) {
if (sb.length() > 0) {
sb.append('|');
}
sb.append(feat);
if (subfeatures_) {
set.add(feat);
String [] parts = feat.split("_:");
if (parts.length > 1) {
for (String part : parts) {
if (!part.isEmpty()) {
set.add(part);
}
}
}
}
}
}
set.add(sb.toString());
}
if (set.isEmpty()) {
return null;
}
return new ArrayList<String>(set);
}
}