package bg.bozho.ikratko.other;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.collections4.Trie;
import org.apache.commons.collections4.trie.PatriciaTrie;
import com.google.common.collect.Sets;
import bg.bozho.ikratko.Checker;
import bg.bozho.ikratko.Checker.InflectedFormType;
public class Echo {
private static final Set<String> commonPrefixes = Sets.newHashSet("раз", "пред", "през", "от", "не", "над", "под");
public static void main(String[] args) throws Exception {
Checker c = new Checker();
c.initialize();
Trie<String, InflectedFormType> reverse = new PatriciaTrie<>();
for (Entry<String, Checker.InflectedFormType> entry: Checker.formsDictionary.entrySet()) {
//using a StringBuilder so that no entry is placed in the jvm string pool
String key = new StringBuilder(entry.getKey()).reverse().toString();
reverse.put(key, entry.getValue());
}
FileOutputStream fos = new FileOutputStream("c:/var/echos.txt");
OutputStreamWriter out = new OutputStreamWriter(fos, "utf-8");
for (String form : c.formsDictionary.keySet()) {
if (form.length() > 2) {
String reversedForm = StringUtils.reverse(form);
Set<String> echoesReversed = reverse.prefixMap(reversedForm).keySet();
StringBuilder sb = new StringBuilder();
String delim = "";
for (String echoReversed : echoesReversed) {
String echo = StringUtils.reverse(echoReversed);
// exclude the same word and any word that is formed directly from it and another word or common prefix
String diff = echo.replace(form, "");
if (diff.length() == 1) {
diff = ""; // ignore 1-letter diffs
}
if (form.equals("античен")) {
diff = "";
}
if (!echo.equals(form)
&& !c.formsDictionary.containsKey(diff)
&& !commonPrefixes.contains(diff)) {
sb.append(delim + echo);
delim = ", ";
}
}
if (sb.length() > 0) {
sb.insert(0, form + ": ");
sb.append("\r\n");
}
out.write(sb.toString());
}
}
out.close();
}
}