// License: GPL. For details, see LICENSE file.
package org.openstreetmap.josm.plugins.osmrec.extractor;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.openstreetmap.josm.Main;
import org.openstreetmap.josm.tools.Utils;
import com.cybozu.labs.langdetect.Detector;
import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LangDetectException;
/**
* Detects language of osm textual information
*
* @author imis-nkarag
*/
public final class LanguageDetector {
private static LanguageDetector languageDetector = null;
private LanguageDetector() {
//prevent instatiation
}
public static LanguageDetector getInstance(String languageProfilesPath) {
if (languageDetector == null) {
languageDetector = new LanguageDetector();
loadProfilesFromStream(languageProfilesPath);
//profilesLoaded = true;
}
return languageDetector;
}
public static void loadProfilesFromStream(String languageProfilesPath) { //create profiles directory in system from stream and load them
/* supported languages
el:greek, en:english, de:german, fr:french, es:spanish, ru:russian, tr:turkish, zh-cn:chinese, hi:hindi
*/
InputStream languageProfilesInputStreamEl = LanguageDetector.class.getResourceAsStream("/profiles/el");
InputStream languageProfilesInputStreamEn = LanguageDetector.class.getResourceAsStream("/profiles/en");
InputStream languageProfilesInputStreamDe = LanguageDetector.class.getResourceAsStream("/profiles/de");
InputStream languageProfilesInputStreamFr = LanguageDetector.class.getResourceAsStream("/profiles/fr");
InputStream languageProfilesInputStreamEs = LanguageDetector.class.getResourceAsStream("/profiles/es");
InputStream languageProfilesInputStreamRu = LanguageDetector.class.getResourceAsStream("/profiles/ru");
InputStream languageProfilesInputStreamTr = LanguageDetector.class.getResourceAsStream("/profiles/tr");
InputStream languageProfilesInputStreamZh = LanguageDetector.class.getResourceAsStream("/profiles/zh-cn");
InputStream languageProfilesInputStreamHi = LanguageDetector.class.getResourceAsStream("/profiles/hi");
//InputStream languageProfilesInputStream2 = LanguageDetector.class.getResourceAsStream("/resources/profiles/en");
if (!new File(languageProfilesPath).exists()) {
Utils.mkDirs(new File(languageProfilesPath));
}
File languageProfilesOutputFileEl = new File(languageProfilesPath + "/el");
File languageProfilesOutputFileEn = new File(languageProfilesPath + "/en");
File languageProfilesOutputFileDe = new File(languageProfilesPath + "/de");
File languageProfilesOutputFileFr = new File(languageProfilesPath + "/fr");
File languageProfilesOutputFileEs = new File(languageProfilesPath + "/es");
File languageProfilesOutputFileRu = new File(languageProfilesPath + "/ru");
File languageProfilesOutputFileTr = new File(languageProfilesPath + "/tr");
File languageProfilesOutputFileZh = new File(languageProfilesPath + "/zh-cn");
File languageProfilesOutputFileHi = new File(languageProfilesPath + "/hi");
try {
languageProfilesOutputFileEl.createNewFile();
languageProfilesOutputFileEn.createNewFile();
languageProfilesOutputFileDe.createNewFile();
languageProfilesOutputFileFr.createNewFile();
languageProfilesOutputFileEs.createNewFile();
languageProfilesOutputFileRu.createNewFile();
languageProfilesOutputFileTr.createNewFile();
languageProfilesOutputFileZh.createNewFile();
languageProfilesOutputFileHi.createNewFile();
} catch (IOException ex) {
Logger.getLogger(LanguageDetector.class.getName()).log(Level.SEVERE, null, ex);
Main.error(ex);
}
try {
Files.copy(languageProfilesInputStreamEl, languageProfilesOutputFileEl.toPath());
Files.copy(languageProfilesInputStreamEn, languageProfilesOutputFileEn.toPath());
Files.copy(languageProfilesInputStreamDe, languageProfilesOutputFileDe.toPath());
Files.copy(languageProfilesInputStreamFr, languageProfilesOutputFileFr.toPath());
Files.copy(languageProfilesInputStreamEs, languageProfilesOutputFileEs.toPath());
Files.copy(languageProfilesInputStreamRu, languageProfilesOutputFileRu.toPath());
Files.copy(languageProfilesInputStreamTr, languageProfilesOutputFileTr.toPath());
Files.copy(languageProfilesInputStreamZh, languageProfilesOutputFileZh.toPath());
Files.copy(languageProfilesInputStreamHi, languageProfilesOutputFileHi.toPath());
} catch (IOException ex) {
Logger.getLogger(LanguageDetector.class.getName()).log(Level.SEVERE, null, ex);
Main.error(ex);
}
try {
DetectorFactory.loadProfile(languageProfilesPath);
} catch (LangDetectException ex) {
Logger.getLogger(LanguageDetector.class.getName()).log(Level.SEVERE, null, ex);
Main.error(ex);
}
}
public String detect(String text) {
try {
Detector detector = DetectorFactory.create();
detector.append(text);
return detector.detect();
} catch (LangDetectException ex) {
Logger.getLogger(LanguageDetector.class.getName()).log(Level.SEVERE, null, ex);
Main.error(ex);
return "en"; //default lang to return if anything goes wrong at detection
}
}
}