/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2009 Alex Buloichik
2011 Didier Briel
2015 Aaron Madlon-Kay
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.core.dictionaries;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.stream.Collectors;
import org.omegat.gui.dictionaries.IDictionaries;
import org.omegat.tokenizer.DefaultTokenizer;
import org.omegat.tokenizer.ITokenizer;
import org.omegat.tokenizer.ITokenizer.StemmingMode;
import org.omegat.util.DirectoryMonitor;
import org.omegat.util.FileUtil;
import org.omegat.util.Language;
import org.omegat.util.Log;
import org.omegat.util.Preferences;
/**
* Class for load dictionaries.
*
* @author Alex Buloichik (alex73mail@gmail.com)
* @author Didier Briel
* @author Aaron Madlon-Kay
*/
public class DictionariesManager implements DirectoryMonitor.Callback {
public static final String IGNORE_FILE = "ignore.txt";
public static final String DICTIONARY_SUBDIR = "dictionary";
private final IDictionaries pane;
protected DirectoryMonitor monitor;
protected final List<IDictionaryFactory> factories = new ArrayList<IDictionaryFactory>();
protected final Map<String, IDictionary> dictionaries = new TreeMap<String, IDictionary>();
protected final Set<String> ignoreWords = new TreeSet<String>();
private Language indexLanguage;
private ITokenizer tokenizer;
public DictionariesManager(final IDictionaries pane) {
this.pane = pane;
factories.add(new LingvoDSL());
factories.add(new StarDict());
indexLanguage = new Language(Locale.getDefault());
tokenizer = new DefaultTokenizer();
}
public void addDictionaryFactory(IDictionaryFactory dict) {
synchronized (factories) {
factories.add(dict);
}
if (monitor != null) {
monitor.fin();
start(monitor.getDir());
}
}
public void removeDictionaryFactory(IDictionaryFactory factory) {
synchronized (factories) {
factories.remove(factory);
}
}
public void start(File dictDir) {
monitor = new DirectoryMonitor(dictDir, this);
monitor.start();
}
public void stop() {
monitor.fin();
synchronized (this) {
dictionaries.clear();
}
}
/**
* Executed on file changed.
*/
public void fileChanged(File file) {
synchronized (dictionaries) {
dictionaries.remove(file.getPath());
}
if (!file.exists()) {
return;
}
try {
long st = System.currentTimeMillis();
if (file.getName().equals(IGNORE_FILE)) {
loadIgnoreWords(file);
} else if (loadDictionary(file)) {
long en = System.currentTimeMillis();
Log.log("Loaded dictionary from '" + file.getPath() + "': " + (en - st) + "ms");
}
} catch (Exception ex) {
Log.log("Error load dictionary from '" + file.getPath() + "': " + ex.getMessage());
}
pane.refresh();
}
/**
* Check all known dictionary factories to see if they support this file.
* Will stop at the first supporting factory and attempt to load the
* dictionary.
*
* @param file
* Dictionary file to be loaded
* @return Whether or not the file was loaded
* @throws Exception
* Even when a file appears to be supported, exceptions can
* still occur while loading.
*/
private boolean loadDictionary(File file) throws Exception {
if (!file.isFile()) {
return false;
}
List<IDictionaryFactory> currFactories;
synchronized (factories) {
currFactories = new ArrayList<IDictionaryFactory>(factories);
}
for (IDictionaryFactory factory : currFactories) {
if (factory.isSupportedFile(file)) {
IDictionary dict = factory.loadDict(file, indexLanguage);
synchronized (this) {
dictionaries.put(file.getPath(), dict);
}
return true;
}
}
return false;
}
/**
* Load ignored words from 'ignore.txt' file.
*/
protected void loadIgnoreWords(File file) throws IOException {
List<String> lines = Files.readAllLines(file.toPath(), StandardCharsets.UTF_8);
synchronized (ignoreWords) {
ignoreWords.clear();
lines.stream().map(String::trim).forEach(line -> ignoreWords.add(line));
}
}
/**
* Add new ignore word.
*/
public void addIgnoreWord(final String word) {
Collection<String> words = Collections.emptyList();
synchronized (ignoreWords) {
ignoreWords.add(word);
words = new ArrayList<String>(ignoreWords);
}
if (monitor != null) {
saveIgnoreWords(words, new File(monitor.getDir(), IGNORE_FILE));
}
}
private static void saveIgnoreWords(Collection<String> words, File outFile) {
try {
File outFileTmp = new File(outFile.getPath() + ".new");
Files.write(outFileTmp.toPath(), words);
outFile.delete();
FileUtil.rename(outFileTmp, outFile);
} catch (IOException ex) {
Log.log("Error saving ignore words");
Log.log(ex);
}
}
private boolean isIgnoreWord(String word) {
synchronized (ignoreWords) {
return ignoreWords.contains(word);
}
}
/**
* Find words list in all dictionaries.
*
* @param words
* words list
* @return articles list
*/
public List<DictionaryEntry> findWords(Collection<String> words) {
List<IDictionary> dicts;
synchronized (this) {
dicts = new ArrayList<IDictionary>(dictionaries.values());
}
return words.stream().filter(word -> !isIgnoreWord(word)).flatMap(word -> {
return dicts.stream().flatMap(dict -> doLookUp(dict, word).stream());
}).collect(Collectors.toList());
}
private List<DictionaryEntry> doLookUp(IDictionary dict, String word) {
String[] stemmed = tokenizer.tokenizeWordsToStrings(word, StemmingMode.MATCHING);
if (stemmed.length == 0) {
// Stop word. Skip.
return Collections.<DictionaryEntry> emptyList();
}
try {
List<DictionaryEntry> result = dict.readArticles(word);
if (!result.isEmpty()) {
return result;
}
// The verbatim word didn't get any hits; try the stem.
if (stemmed.length > 1 && doFuzzyMatching()) {
return dict.readArticlesPredictive(stemmed[0]);
}
} catch (Exception ex) {
Log.log(ex);
}
return Collections.<DictionaryEntry> emptyList();
}
public void setIndexLanguage(Language indexLanguage) {
this.indexLanguage = indexLanguage;
}
public void setTokenizer(ITokenizer tokenizer) {
this.tokenizer = tokenizer;
}
// Implemented as method for testing purposes
protected boolean doFuzzyMatching() {
return Preferences.isPreferenceDefault(Preferences.DICTIONARY_FUZZY_MATCHING, true);
}
}