package org.xbib.elasticsearch.index.analysis.baseform;
import org.junit.Assert;
import org.junit.Test;
import org.xbib.elasticsearch.common.fsa.Dictionary;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.CharacterCodingException;
public class DictionaryTests extends Assert {
@Test
public void verifyDE() throws IOException {
Dictionary dictionary = new Dictionary();
InputStreamReader reader = new InputStreamReader(getClass().getResource("/baseform/de-lemma-utf8.txt").openStream(), "UTF-8");
dictionary.loadLines(reader);
reader.close();
BufferedReader br = new BufferedReader(new InputStreamReader(getClass().getResource("/baseform/de-lemma-utf8.txt").openStream(), "UTF-8"));
String line;
while ((line = br.readLine()) != null) {
if (!line.startsWith("#")) {
if (!check(line, dictionary)) {
break;
}
}
}
br.close();
}
private boolean check(String line, Dictionary dictionary) throws CharacterCodingException {
int pos = line.indexOf("\t");
String word = pos > 0 ? line.substring(0, pos) : line;
try {
CharSequence baseform = dictionary.lookup(word);
} catch (StackOverflowError e) {
// if stack overflow error occurs, we have faulty entries
return false;
}
return true;
}
}