package com.constellio.model.services.parser;
import java.io.File;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.constellio.data.dao.managers.StatefulService;
import com.constellio.model.services.parser.LanguageDetectionServicesRuntimeException.LanguageDetectionManagerRuntimeException_CannotDetectLanguage;
import com.cybozu.labs.langdetect.Detector;
import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LangDetectException;
public class LanguageDetectionManager implements StatefulService {
private static Logger LOGGER = LoggerFactory.getLogger(LanguageDetectionManager.class);
private static boolean schemasLoaded;
private File languageProfiles;
public LanguageDetectionManager(File languageProfiles) {
this.languageProfiles = languageProfiles;
}
@Override
public void initialize() {
if (!schemasLoaded) {
try {
LOGGER.info("Loading profiles " + languageProfiles.getAbsolutePath());
DetectorFactory.loadProfile(languageProfiles);
//Langdetect uses random sampling for avoiding local noises(person name, place name and so on),
//so the language detections of the same document might differ for every time.
//This feature is disabled since it cause to much random behaviors
DetectorFactory.setSeed(0);
} catch (Throwable e) {
throw new LanguageDetectionServicesRuntimeException("Cannot load schemas", e);
}
schemasLoaded = true;
}
}
public String tryDetectLanguage(String content) {
try {
return detectLanguage(content);
} catch (LanguageDetectionServicesRuntimeException e) {
LOGGER.info(e.getMessage());
return null;
}
}
public String detectLanguage(String content) {
try {
Detector detector = DetectorFactory.create();
if (content.length() > 200) {
int firstSpaceAfter200Characters = content.indexOf(" ", 200);
if (firstSpaceAfter200Characters != -1) {
content = content.substring(0, firstSpaceAfter200Characters);
}
}
detector.append(content);
return detector.detect();
} catch (LangDetectException e) {
throw new LanguageDetectionManagerRuntimeException_CannotDetectLanguage(content, e);
}
}
@Override
public void close() {
}
}