package com.cybozu.labs.langdetect; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import com.cybozu.labs.langdetect.util.LangProfile; import io.lumify.core.util.LumifyLogger; import io.lumify.core.util.LumifyLoggerFactory; import org.apache.commons.io.IOUtils; import org.json.JSONObject; /** * Language Detector Factory Class * * This class manages an initialization and constructions of {@link Detector}. * * Before using language detection library, * load profiles with {@link DetectorFactory#loadProfile(String)} method * and set initialization parameters. * * When the language detection, * construct Detector instance via {@link DetectorFactory#create()}. * See also {@link Detector}'s sample code. * * <ul> * <li>4x faster improvement based on Elmer Garduno's code. Thanks!</li> * </ul> * * @see Detector * @author Nakatani Shuyo */ public class DetectorFactory { private static final LumifyLogger LOGGER = LumifyLoggerFactory.getLogger(DetectorFactory.class); public HashMap<String, double[]> wordLangProbMap; public ArrayList<String> langlist; public Long seed = null; private DetectorFactory() { wordLangProbMap = new HashMap<String, double[]>(); langlist = new ArrayList<String>(); } static private DetectorFactory instance_ = new DetectorFactory(); /** * Load profiles from specified directory. * This method must be called once before language detection. * * @param profileDirectory profile directory path * @throws LangDetectException Can't open profiles(error code = {@link ErrorCode#FileLoadError}) * or profile's format is wrong (error code = {@link ErrorCode#FormatError}) */ public static void loadProfile(String profileDirectory) throws LangDetectException { loadProfile(new File(profileDirectory)); } /** * Load profiles from specified directory. * This method must be called once before language detection. * * @param profileDirectory profile directory path * @throws LangDetectException Can't open profiles(error code = {@link ErrorCode#FileLoadError}) * or profile's format is wrong (error code = {@link ErrorCode#FormatError}) */ public static void loadProfile(File profileDirectory) throws LangDetectException { File[] listFiles = profileDirectory.listFiles(); if (listFiles == null) throw new LangDetectException(ErrorCode.NeedLoadProfileError, "Not found profile: " + profileDirectory); int langsize = listFiles.length, index = 0; for (File file: listFiles) { if (file.getName().startsWith(".") || !file.isFile()) continue; FileInputStream is = null; try { LOGGER.debug("Loading language profile %s", file.toString()); is = new FileInputStream(file); LangProfile profile = new LangProfile(new JSONObject(IOUtils.toString(is, "UTF-8"))); try { addProfile(profile, index, langsize); } catch (LangDetectException e) { LOGGER.info("Duplicate language profile, %s, ", profile.name); } ++index; } catch (IOException e) { throw new LangDetectException(ErrorCode.FileLoadError, "can't open '" + file.getName() + "'"); } finally { try { if (is!=null) is.close(); } catch (IOException e) {} } } } /** * @param profile * @param langsize * @param index * @throws LangDetectException */ static /* package scope */ void addProfile(LangProfile profile, int index, int langsize) throws LangDetectException { String lang = profile.name; if (instance_.langlist.contains(lang)) { throw new LangDetectException(ErrorCode.DuplicateLangError, "duplicate the same language profile"); } instance_.langlist.add(lang); for (String word: profile.freq.keySet()) { if (!instance_.wordLangProbMap.containsKey(word)) { instance_.wordLangProbMap.put(word, new double[langsize]); } int length = word.length(); if (length >= 1 && length <= 3) { double prob = profile.freq.get(word).doubleValue() / profile.n_words[length - 1]; instance_.wordLangProbMap.get(word)[index] = prob; } } } /** * Clear loaded language profiles (reinitialization to be available) */ static public void clear() { instance_.langlist.clear(); instance_.wordLangProbMap.clear(); } /** * Construct Detector instance * * @return Detector instance * @throws LangDetectException */ static public Detector create() throws LangDetectException { return createDetector(); } /** * Construct Detector instance with smoothing parameter * * @param alpha smoothing parameter (default value = 0.5) * @return Detector instance * @throws LangDetectException */ public static Detector create(double alpha) throws LangDetectException { Detector detector = createDetector(); detector.setAlpha(alpha); return detector; } static private Detector createDetector() throws LangDetectException { if (instance_.langlist.size()==0) throw new LangDetectException(ErrorCode.NeedLoadProfileError, "need to load profiles"); Detector detector = new Detector(instance_); return detector; } public static void setSeed(long seed) { instance_.seed = seed; } public static final List<String> getLangList() { return Collections.unmodifiableList(instance_.langlist); } }