/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2010 Alex Buloichik, Ibai Lakunza Velasco, Didier Briel
2013 Martin Wunderlich
2014 Manfred Martin
2015 Didier Briel
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.core.machinetranslators;
import java.awt.Window;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Locale;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.omegat.core.Core;
import org.omegat.core.matching.LevenshteinDistance;
import org.omegat.gui.exttrans.MTConfigDialog;
import org.omegat.tokenizer.ITokenizer;
import org.omegat.util.Language;
import org.omegat.util.OStrings;
import org.omegat.util.StringUtil;
import org.omegat.util.Token;
import org.omegat.util.WikiGet;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
* @author Ibai Lakunza Velasco
* @author Didier Briel
* @author Martin Wunderlich
* @author Manfred Martin
* @author Didier Briel
*/
public abstract class AbstractMyMemoryTranslate extends BaseTranslate {
private static final String MYMEMORY_API_EMAIL = "mymemory.api.email";
protected static final String GT_URL = "http://mymemory.translated.net/api/get?q=";
protected static final String MYMEMORYLABEL_TRANSLATION = "translation";
protected static final String MYMEMORYLABEL_MATCHQUALITYPERCENTAGE = "match";
// MyMemory always returns a 4-letter locale code, even when the query
// contains a language code only; to make sure we get the right matches,
// only the language code is taken into account
protected static final String XPATH_QUERY = "child::tuv[starts-with(@lang, '#langCode#')]/seg/text()";
protected final DocumentBuilderFactory factory;
protected final XPathFactory xPathFactory;
public AbstractMyMemoryTranslate() {
factory = DocumentBuilderFactory.newInstance();
xPathFactory = XPathFactory.newInstance();
}
@Override
protected abstract String getPreferenceName();
@Override
public abstract String getName();
/**
* Modify some country codes to fit with MyMemory
*
* @param language
* An OmegaT language
* @return A code modified for MyMemory languages
*/
protected String mymemoryCode(Language language) {
String lCode = language.getLanguageCode().toLowerCase();
return lCode;
}
@Override
protected abstract String translate(Language sLang, Language tLang, String text) throws Exception;
/**
* @param sLang
* @param tLang
* @param text
* @param xpath
* @param allTUs
* @return
* @throws XPathExpressionException
*/
protected String getBestTranslation(Language sLang, Language tLang, String text, XPath xpath, NodeList allTUs)
throws XPathExpressionException {
int lowestEditDistance = 999999;
int dist = 0;
Node tu = null;
String sourceSeg = "";
String targetSeg = "";
String targetSegQueryString = XPATH_QUERY.replace("#langCode#", tLang.getLanguageCode());
String sourceSegQueryString = XPATH_QUERY.replace("#langCode#", sLang.getLanguageCode());
String bestTranslation = "";
// Loop over TUs to get best matching source segment and its translation
for (int i = 0; i < allTUs.getLength(); i++) {
tu = allTUs.item(i);
sourceSeg = xpath.evaluate(sourceSegQueryString, tu);
targetSeg = xpath.evaluate(targetSegQueryString, tu);
// Make strings lowercase to make comparison case-insensitive.
// (Case-sensitive comparison would penalize mere capitalization
// differences equally with whole-word differences.)
Locale srcLoc = Core.getProject().getProjectProperties().getSourceLanguage().getLocale();
dist = getLevensteinDistance(text.toLowerCase(srcLoc), sourceSeg.toLowerCase(srcLoc));
if (dist < lowestEditDistance && !sourceSeg.isEmpty() && !targetSeg.isEmpty()) {
lowestEditDistance = dist;
bestTranslation = targetSeg;
}
if (dist == 0) {
break; // Can't find a better match than this one, so let's stop
// the loop here.
}
}
bestTranslation = cleanUpText(bestTranslation);
return bestTranslation;
}
protected String cleanUpText(String str) {
str = str.replace(""", "\"");
str = str.replace(" ", "\u00A0");
str = str.replace("&", "&");
str = str.replace("'", "'");
str = str.replace("'", "'");
str = str.replace("<", "<");
str = str.replace(">", ">");
str = str.trim();
return str;
}
/**
* @param text
* @param sourceSeg
* @return
*/
private int getLevensteinDistance(String text, String sourceSeg) {
int dist;
LevenshteinDistance leven = new LevenshteinDistance();
ITokenizer srcTokenizer = Core.getProject().getSourceTokenizer();
Token[] textTokenArray = srcTokenizer.tokenizeVerbatim(text);
Token[] sourceSegTokenArray = srcTokenizer.tokenizeVerbatim(sourceSeg);
dist = leven.compute(textTokenArray, sourceSegTokenArray);
return dist;
}
protected String getMyMemoryResponse(Language sLang, Language tLang, String text, String format)
throws UnsupportedEncodingException, IOException {
String url = buildMyMemoryUrl(sLang, tLang, text, format);
// Get email from systemProperties to enable 1000rq/day instead of 100 rq/day
String email = getCredential(MYMEMORY_API_EMAIL);
if (!StringUtil.isEmpty(email)) {
url = url + "&de=" + email;
}
// Get the results from MyMemory
String myMemoryResponse = "";
try {
myMemoryResponse = WikiGet.getURL(url);
} catch (IOException e) {
throw e;
}
return myMemoryResponse;
}
/**
* @param sLang
* @param tLang
* @param text
* @param format
* @return
* @throws UnsupportedEncodingException
*
* This method must be overriden in the concrete implementations
* to adjust the query to include or exclude MT results
*/
protected abstract String buildMyMemoryUrl(Language sLang, Language tLang, String text, String format)
throws UnsupportedEncodingException;
/**
* Removes any character before <?xml in a string.
* This prevents an exception when trying to create a DOM object from that string.
* @param str The input string
* @return The string starting with <?xml, if found, or the initial string
*/
protected String getXMLString(String str) {
int xmlHeader = str.indexOf("<?xml");
if (xmlHeader != -1) { // XML header is not at the beginning
str = str.substring(xmlHeader);
}
return str;
}
@Override
public boolean isConfigurable() {
return true;
}
@Override
public void showConfigurationUI(Window parent) {
MTConfigDialog dialog = new MTConfigDialog(parent, getName()) {
@Override
protected void onConfirm() {
String email = panel.valueField1.getText().trim();
boolean temporary = panel.temporaryCheckBox.isSelected();
setCredential(MYMEMORY_API_EMAIL, email, temporary);
}
};
dialog.panel.valueLabel1.setText(OStrings.getString("MT_ENGINE_MYMEMORY_EMAIL_LABEL"));
dialog.panel.valueField1.setText(getCredential(MYMEMORY_API_EMAIL));
dialog.panel.valueLabel2.setVisible(false);
dialog.panel.valueField2.setVisible(false);
dialog.panel.temporaryCheckBox.setSelected(isCredentialStoredTemporarily(MYMEMORY_API_EMAIL));
dialog.show();
}
}