package org.xbib.elasticsearch.index.mapper.langdetect;
import org.elasticsearch.common.settings.Settings;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.xbib.elasticsearch.common.langdetect.LangProfile;
import org.xbib.elasticsearch.common.langdetect.LanguageDetectionException;
import org.xbib.elasticsearch.common.langdetect.LangdetectService;
public class DetectorTest extends Assert {
private static final String TRAINING_EN = "a a a b b c c d e";
private static final String TRAINING_FR = "a b b c c c d d d";
private static final String TRAINING_JA = "\u3042 \u3042 \u3042 \u3044 \u3046 \u3048 \u3048";
private static LangdetectService detect;
@BeforeClass
public static void setUp() throws Exception {
detect = new LangdetectService(Settings.EMPTY);
LangProfile profile_en = new LangProfile();
profile_en.setName("en_test");
for (String w : TRAINING_EN.split(" ")) {
profile_en.add(w);
}
detect.addProfile(profile_en, 0, 3);
LangProfile profile_fr = new LangProfile();
profile_fr.setName("fr_test");
for (String w : TRAINING_FR.split(" ")) {
profile_fr.add(w);
}
detect.addProfile(profile_fr, 1, 3);
LangProfile profile_ja = new LangProfile();
profile_ja.setName("ja_test");
for (String w : TRAINING_JA.split(" ")) {
profile_ja.add(w);
}
detect.addProfile(profile_ja, 2, 3);
}
@Test
public void testDetector1() throws LanguageDetectionException {
assertEquals(detect.detectAll("a").get(0).getLanguage(), "en_test");
}
@Test
public void testDetector2() throws LanguageDetectionException {
assertEquals(detect.detectAll("b d").get(0).getLanguage(), "fr_test");
}
@Test
public void testDetector3() throws LanguageDetectionException {
assertEquals(detect.detectAll("d e").get(0).getLanguage(), "en_test");
}
@Test
public void testDetector4() throws LanguageDetectionException {
assertEquals(detect.detectAll("\u3042\u3042\u3042\u3042a").get(0).getLanguage(), "ja_test");
}
@Test
public void testPunctuation() throws LanguageDetectionException {
assertTrue(detect.detectAll("...").isEmpty());
}
}