/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2010 Alex Buloichik
2015 Aaron Madlon-Kay
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.core.dictionaries;
import java.io.File;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.omegat.util.Language;
/**
* Dictionary implementation for Lingvo DSL format.
*
* Lingvo DSL format described in Lingvo help. See also
* http://www.dsleditor.narod.ru/art_03.htm(russian).
*
* @author Alex Buloichik (alex73mail@gmail.com)
* @author Aaron Madlon-Kay
*/
public class LingvoDSL implements IDictionaryFactory {
protected static final Pattern RE_SKIP = Pattern.compile("\\[.+?\\]");
protected static final String[] EMPTY_RESULT = new String[0];
@Override
public boolean isSupportedFile(File file) {
return file.getPath().endsWith(".dsl");
}
@Override
public IDictionary loadDict(File file) throws Exception {
return loadDict(file, new Language(Locale.getDefault()));
}
@Override
public IDictionary loadDict(File file, Language language) throws Exception {
return new LingvoDSLDict(loadData(file, language));
}
private static DictionaryData<String> loadData(File file, Language language) throws Exception {
DictionaryData<String> data = new DictionaryData<>(language);
StringBuilder word = new StringBuilder();
StringBuilder trans = new StringBuilder();
Files.lines(file.toPath(), StandardCharsets.UTF_16).filter(line -> !line.isEmpty() && !line.startsWith("#"))
.map(line -> RE_SKIP.matcher(line).replaceAll("")).forEach(line -> {
if (Character.isWhitespace(line.codePointAt(0))) {
trans.append(line.trim()).append('\n');
} else {
if (word.length() > 0) {
data.add(word.toString(), trans.toString());
word.setLength(0);
trans.setLength(0);
}
word.append(line);
}
});
if (word.length() > 0) {
data.add(word.toString(), trans.toString());
}
data.done();
return data;
}
static class LingvoDSLDict implements IDictionary {
protected final DictionaryData<String> data;
private LingvoDSLDict(DictionaryData<String> data) throws Exception {
this.data = data;
}
@Override
public List<DictionaryEntry> readArticles(String word) throws Exception {
return data.lookUp(word).stream().map(e -> new DictionaryEntry(e.getKey(), e.getValue()))
.collect(Collectors.toList());
}
@Override
public List<DictionaryEntry> readArticlesPredictive(String word) throws Exception {
return data.lookUpPredictive(word).stream().map(e -> new DictionaryEntry(e.getKey(), e.getValue()))
.collect(Collectors.toList());
}
}
}