/************************************************************************** OmegaT - Computer Assisted Translation (CAT) tool with fuzzy matching, translation memory, keyword search, glossaries, and translation leveraging into updated projects. Copyright (C) 2009 Alex Buloichik 2011 Didier Briel 2015 Aaron Madlon-Kay Home page: http://www.omegat.org/ Support center: http://groups.yahoo.com/group/OmegaT/ This file is part of OmegaT. OmegaT is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OmegaT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. **************************************************************************/ package org.omegat.core.dictionaries; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import java.util.stream.Collectors; import org.omegat.gui.dictionaries.IDictionaries; import org.omegat.tokenizer.DefaultTokenizer; import org.omegat.tokenizer.ITokenizer; import org.omegat.tokenizer.ITokenizer.StemmingMode; import org.omegat.util.DirectoryMonitor; import org.omegat.util.FileUtil; import org.omegat.util.Language; import org.omegat.util.Log; import org.omegat.util.Preferences; /** * Class for load dictionaries. * * @author Alex Buloichik (alex73mail@gmail.com) * @author Didier Briel * @author Aaron Madlon-Kay */ public class DictionariesManager implements DirectoryMonitor.Callback { public static final String IGNORE_FILE = "ignore.txt"; public static final String DICTIONARY_SUBDIR = "dictionary"; private final IDictionaries pane; protected DirectoryMonitor monitor; protected final List<IDictionaryFactory> factories = new ArrayList<IDictionaryFactory>(); protected final Map<String, IDictionary> dictionaries = new TreeMap<String, IDictionary>(); protected final Set<String> ignoreWords = new TreeSet<String>(); private Language indexLanguage; private ITokenizer tokenizer; public DictionariesManager(final IDictionaries pane) { this.pane = pane; factories.add(new LingvoDSL()); factories.add(new StarDict()); indexLanguage = new Language(Locale.getDefault()); tokenizer = new DefaultTokenizer(); } public void addDictionaryFactory(IDictionaryFactory dict) { synchronized (factories) { factories.add(dict); } if (monitor != null) { monitor.fin(); start(monitor.getDir()); } } public void removeDictionaryFactory(IDictionaryFactory factory) { synchronized (factories) { factories.remove(factory); } } public void start(File dictDir) { monitor = new DirectoryMonitor(dictDir, this); monitor.start(); } public void stop() { monitor.fin(); synchronized (this) { dictionaries.clear(); } } /** * Executed on file changed. */ public void fileChanged(File file) { synchronized (dictionaries) { dictionaries.remove(file.getPath()); } if (!file.exists()) { return; } try { long st = System.currentTimeMillis(); if (file.getName().equals(IGNORE_FILE)) { loadIgnoreWords(file); } else if (loadDictionary(file)) { long en = System.currentTimeMillis(); Log.log("Loaded dictionary from '" + file.getPath() + "': " + (en - st) + "ms"); } } catch (Exception ex) { Log.log("Error load dictionary from '" + file.getPath() + "': " + ex.getMessage()); } pane.refresh(); } /** * Check all known dictionary factories to see if they support this file. * Will stop at the first supporting factory and attempt to load the * dictionary. * * @param file * Dictionary file to be loaded * @return Whether or not the file was loaded * @throws Exception * Even when a file appears to be supported, exceptions can * still occur while loading. */ private boolean loadDictionary(File file) throws Exception { if (!file.isFile()) { return false; } List<IDictionaryFactory> currFactories; synchronized (factories) { currFactories = new ArrayList<IDictionaryFactory>(factories); } for (IDictionaryFactory factory : currFactories) { if (factory.isSupportedFile(file)) { IDictionary dict = factory.loadDict(file, indexLanguage); synchronized (this) { dictionaries.put(file.getPath(), dict); } return true; } } return false; } /** * Load ignored words from 'ignore.txt' file. */ protected void loadIgnoreWords(File file) throws IOException { List<String> lines = Files.readAllLines(file.toPath(), StandardCharsets.UTF_8); synchronized (ignoreWords) { ignoreWords.clear(); lines.stream().map(String::trim).forEach(line -> ignoreWords.add(line)); } } /** * Add new ignore word. */ public void addIgnoreWord(final String word) { Collection<String> words = Collections.emptyList(); synchronized (ignoreWords) { ignoreWords.add(word); words = new ArrayList<String>(ignoreWords); } if (monitor != null) { saveIgnoreWords(words, new File(monitor.getDir(), IGNORE_FILE)); } } private static void saveIgnoreWords(Collection<String> words, File outFile) { try { File outFileTmp = new File(outFile.getPath() + ".new"); Files.write(outFileTmp.toPath(), words); outFile.delete(); FileUtil.rename(outFileTmp, outFile); } catch (IOException ex) { Log.log("Error saving ignore words"); Log.log(ex); } } private boolean isIgnoreWord(String word) { synchronized (ignoreWords) { return ignoreWords.contains(word); } } /** * Find words list in all dictionaries. * * @param words * words list * @return articles list */ public List<DictionaryEntry> findWords(Collection<String> words) { List<IDictionary> dicts; synchronized (this) { dicts = new ArrayList<IDictionary>(dictionaries.values()); } return words.stream().filter(word -> !isIgnoreWord(word)).flatMap(word -> { return dicts.stream().flatMap(dict -> doLookUp(dict, word).stream()); }).collect(Collectors.toList()); } private List<DictionaryEntry> doLookUp(IDictionary dict, String word) { String[] stemmed = tokenizer.tokenizeWordsToStrings(word, StemmingMode.MATCHING); if (stemmed.length == 0) { // Stop word. Skip. return Collections.<DictionaryEntry> emptyList(); } try { List<DictionaryEntry> result = dict.readArticles(word); if (!result.isEmpty()) { return result; } // The verbatim word didn't get any hits; try the stem. if (stemmed.length > 1 && doFuzzyMatching()) { return dict.readArticlesPredictive(stemmed[0]); } } catch (Exception ex) { Log.log(ex); } return Collections.<DictionaryEntry> emptyList(); } public void setIndexLanguage(Language indexLanguage) { this.indexLanguage = indexLanguage; } public void setTokenizer(ITokenizer tokenizer) { this.tokenizer = tokenizer; } // Implemented as method for testing purposes protected boolean doFuzzyMatching() { return Preferences.isPreferenceDefault(Preferences.DICTIONARY_FUZZY_MATCHING, true); } }