/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2016 Aaron Madlon-Kay
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.gui.editor.history;
import java.util.Collections;
import java.util.List;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import org.omegat.core.Core;
import org.omegat.core.CoreEvents;
import org.omegat.core.data.SourceTextEntry;
import org.omegat.core.data.TMXEntry;
import org.omegat.core.events.IEntryEventListener;
import org.omegat.core.events.IProjectEventListener.PROJECT_CHANGE_TYPE;
import org.omegat.gui.editor.autocompleter.AutoCompleterItem;
import org.omegat.gui.editor.autocompleter.AutoCompleterListView;
import org.omegat.tokenizer.ITokenizer.StemmingMode;
import org.omegat.util.OStrings;
import org.omegat.util.Preferences;
public class HistoryPredictor extends AutoCompleterListView {
private static final Logger LOGGER = Logger.getLogger(HistoryPredictor.class.getName());
WordPredictor predictor = new WordPredictor();
private SourceTextEntry currentEntry;
private boolean isCurrentEntryTranslated;
public HistoryPredictor() {
super(OStrings.getString("AC_HISTORY_PREDICTIONS_VIEW"));
CoreEvents.registerProjectChangeListener(eventType -> {
if (isEnabled() && eventType == PROJECT_CHANGE_TYPE.LOAD) {
train();
}
});
CoreEvents.registerEntryEventListener(new IEntryEventListener() {
@Override
public void onNewFile(String activeFileName) {
}
@Override
public void onEntryActivated(SourceTextEntry newEntry) {
if (!isEnabled()) {
return;
}
SourceTextEntry lastEntry = currentEntry;
boolean wasTranslated = isCurrentEntryTranslated;
if (lastEntry != null && !wasTranslated) {
TMXEntry newTranslation = Core.getProject().getTranslationInfo(lastEntry);
if (newTranslation.isTranslated()) {
trainString(newTranslation.translation);
}
}
currentEntry = newEntry;
isCurrentEntryTranslated = Core.getProject().getTranslationInfo(newEntry).isTranslated();
}
});
Preferences.addPropertyChangeListener(Preferences.AC_HISTORY_PREDICTION_ENABLED, evt -> {
if ((Boolean) evt.getNewValue()) {
if (Core.getProject().isProjectLoaded()) {
train();
}
} else {
predictor.reset();
}
});
}
synchronized void train() {
long start = System.currentTimeMillis();
predictor.reset();
Core.getProject().iterateByDefaultTranslations((source, trans) -> trainString(trans.translation));
Core.getProject().iterateByMultipleTranslations((source, trans) -> trainString(trans.translation));
long time = System.currentTimeMillis() - start;
LOGGER.finer(() -> String.format("Time to train History Predictor: %d ms", time));
}
private void trainString(String text) {
if (text == null) {
return;
}
String[] tokens = getTokenizer().tokenizeWordsToStrings(text, StemmingMode.NONE);
predictor.train(tokens);
}
@Override
public List<AutoCompleterItem> computeListData(String prevText, boolean contextualOnly) {
if (prevText == null || prevText.isEmpty()) {
return Collections.emptyList();
}
String[] tokens = getTokenizer().tokenizeVerbatimToStrings(prevText);
String seed = lastFullWordToken(tokens);
if (seed.isEmpty()) {
return Collections.emptyList();
}
List<AutoCompleterItem> predictions = predictor.predictWord(seed).stream().map(p -> {
return new AutoCompleterItem(p.getWord(),
new String[] { String.valueOf(Math.round(p.getFrequency())) + "%" }, 0);
}).collect(Collectors.toList());
if (predictions.isEmpty()) {
return predictions;
}
// We have a non-space-delimited language, so it's not possible to
// distinguish between a new-word situation and a completion situation.
if (!isLanguageSpaceDelimited()) {
return predictions;
}
// We are starting a new word so all predictions are relevant
if (tokens[tokens.length - 1].trim().isEmpty()) {
return predictions;
}
// We have context to filter on
String context = tokens[tokens.length - 1];
return predictions.stream().filter(item -> item.payload.startsWith(context) && !item.payload.equals(context))
.map(item -> new AutoCompleterItem(item.payload, item.extras, context.length()))
.collect(Collectors.toList());
}
/**
* Find the last <em>completed</em> word.
* <p>
* If the language is space-delimited, that means ignoring the last token
* (which should be a partially input word) and then iterating backwards to
* find the first non-whitespace token.
* <p>
* If the language is not space-delimited, use the last token, as we have no
* way of distinguishing a completed word from an incomplete one.
*
* @param tokens
* @return
*/
private String lastFullWordToken(String[] tokens) {
int startOffset = isLanguageSpaceDelimited() ? 2 : 1;
for (int i = tokens.length - startOffset; i >= 0; i--) {
String token = tokens[i];
if (!token.trim().isEmpty()) {
return token;
}
}
return "";
}
@Override
public String itemToString(AutoCompleterItem item) {
return "<html>" + item.payload + " <font color=\"gray\">(" + item.extras[0] + ")</font></html>";
}
@Override
protected boolean isEnabled() {
return Preferences.isPreference(Preferences.AC_HISTORY_PREDICTION_ENABLED);
}
private boolean isLanguageSpaceDelimited() {
return getTargetLanguage().isSpaceDelimited();
}
}