/************************************************************************** OmegaT - Computer Assisted Translation (CAT) tool with fuzzy matching, translation memory, keyword search, glossaries, and translation leveraging into updated projects. Copyright (C) 2010 Alex Buloichik, Ibai Lakunza Velasco, Didier Briel 2013 Martin Wunderlich 2014 Manfred Martin 2015 Didier Briel Home page: http://www.omegat.org/ Support center: http://groups.yahoo.com/group/OmegaT/ This file is part of OmegaT. OmegaT is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OmegaT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. **************************************************************************/ package org.omegat.core.machinetranslators; import java.awt.Window; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.Locale; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.omegat.core.Core; import org.omegat.core.matching.LevenshteinDistance; import org.omegat.gui.exttrans.MTConfigDialog; import org.omegat.tokenizer.ITokenizer; import org.omegat.util.Language; import org.omegat.util.OStrings; import org.omegat.util.StringUtil; import org.omegat.util.Token; import org.omegat.util.WikiGet; import org.w3c.dom.Node; import org.w3c.dom.NodeList; /** * @author Ibai Lakunza Velasco * @author Didier Briel * @author Martin Wunderlich * @author Manfred Martin * @author Didier Briel */ public abstract class AbstractMyMemoryTranslate extends BaseTranslate { private static final String MYMEMORY_API_EMAIL = "mymemory.api.email"; protected static final String GT_URL = "http://mymemory.translated.net/api/get?q="; protected static final String MYMEMORYLABEL_TRANSLATION = "translation"; protected static final String MYMEMORYLABEL_MATCHQUALITYPERCENTAGE = "match"; // MyMemory always returns a 4-letter locale code, even when the query // contains a language code only; to make sure we get the right matches, // only the language code is taken into account protected static final String XPATH_QUERY = "child::tuv[starts-with(@lang, '#langCode#')]/seg/text()"; protected final DocumentBuilderFactory factory; protected final XPathFactory xPathFactory; public AbstractMyMemoryTranslate() { factory = DocumentBuilderFactory.newInstance(); xPathFactory = XPathFactory.newInstance(); } @Override protected abstract String getPreferenceName(); @Override public abstract String getName(); /** * Modify some country codes to fit with MyMemory * * @param language * An OmegaT language * @return A code modified for MyMemory languages */ protected String mymemoryCode(Language language) { String lCode = language.getLanguageCode().toLowerCase(); return lCode; } @Override protected abstract String translate(Language sLang, Language tLang, String text) throws Exception; /** * @param sLang * @param tLang * @param text * @param xpath * @param allTUs * @return * @throws XPathExpressionException */ protected String getBestTranslation(Language sLang, Language tLang, String text, XPath xpath, NodeList allTUs) throws XPathExpressionException { int lowestEditDistance = 999999; int dist = 0; Node tu = null; String sourceSeg = ""; String targetSeg = ""; String targetSegQueryString = XPATH_QUERY.replace("#langCode#", tLang.getLanguageCode()); String sourceSegQueryString = XPATH_QUERY.replace("#langCode#", sLang.getLanguageCode()); String bestTranslation = ""; // Loop over TUs to get best matching source segment and its translation for (int i = 0; i < allTUs.getLength(); i++) { tu = allTUs.item(i); sourceSeg = xpath.evaluate(sourceSegQueryString, tu); targetSeg = xpath.evaluate(targetSegQueryString, tu); // Make strings lowercase to make comparison case-insensitive. // (Case-sensitive comparison would penalize mere capitalization // differences equally with whole-word differences.) Locale srcLoc = Core.getProject().getProjectProperties().getSourceLanguage().getLocale(); dist = getLevensteinDistance(text.toLowerCase(srcLoc), sourceSeg.toLowerCase(srcLoc)); if (dist < lowestEditDistance && !sourceSeg.isEmpty() && !targetSeg.isEmpty()) { lowestEditDistance = dist; bestTranslation = targetSeg; } if (dist == 0) { break; // Can't find a better match than this one, so let's stop // the loop here. } } bestTranslation = cleanUpText(bestTranslation); return bestTranslation; } protected String cleanUpText(String str) { str = str.replace(""", "\""); str = str.replace(" ", "\u00A0"); str = str.replace("&", "&"); str = str.replace("'", "'"); str = str.replace("'", "'"); str = str.replace("<", "<"); str = str.replace(">", ">"); str = str.trim(); return str; } /** * @param text * @param sourceSeg * @return */ private int getLevensteinDistance(String text, String sourceSeg) { int dist; LevenshteinDistance leven = new LevenshteinDistance(); ITokenizer srcTokenizer = Core.getProject().getSourceTokenizer(); Token[] textTokenArray = srcTokenizer.tokenizeVerbatim(text); Token[] sourceSegTokenArray = srcTokenizer.tokenizeVerbatim(sourceSeg); dist = leven.compute(textTokenArray, sourceSegTokenArray); return dist; } protected String getMyMemoryResponse(Language sLang, Language tLang, String text, String format) throws UnsupportedEncodingException, IOException { String url = buildMyMemoryUrl(sLang, tLang, text, format); // Get email from systemProperties to enable 1000rq/day instead of 100 rq/day String email = getCredential(MYMEMORY_API_EMAIL); if (!StringUtil.isEmpty(email)) { url = url + "&de=" + email; } // Get the results from MyMemory String myMemoryResponse = ""; try { myMemoryResponse = WikiGet.getURL(url); } catch (IOException e) { throw e; } return myMemoryResponse; } /** * @param sLang * @param tLang * @param text * @param format * @return * @throws UnsupportedEncodingException * * This method must be overriden in the concrete implementations * to adjust the query to include or exclude MT results */ protected abstract String buildMyMemoryUrl(Language sLang, Language tLang, String text, String format) throws UnsupportedEncodingException; /** * Removes any character before <?xml in a string. * This prevents an exception when trying to create a DOM object from that string. * @param str The input string * @return The string starting with <?xml, if found, or the initial string */ protected String getXMLString(String str) { int xmlHeader = str.indexOf("<?xml"); if (xmlHeader != -1) { // XML header is not at the beginning str = str.substring(xmlHeader); } return str; } @Override public boolean isConfigurable() { return true; } @Override public void showConfigurationUI(Window parent) { MTConfigDialog dialog = new MTConfigDialog(parent, getName()) { @Override protected void onConfirm() { String email = panel.valueField1.getText().trim(); boolean temporary = panel.temporaryCheckBox.isSelected(); setCredential(MYMEMORY_API_EMAIL, email, temporary); } }; dialog.panel.valueLabel1.setText(OStrings.getString("MT_ENGINE_MYMEMORY_EMAIL_LABEL")); dialog.panel.valueField1.setText(getCredential(MYMEMORY_API_EMAIL)); dialog.panel.valueLabel2.setVisible(false); dialog.panel.valueField2.setVisible(false); dialog.panel.temporaryCheckBox.setSelected(isCredentialStoredTemporarily(MYMEMORY_API_EMAIL)); dialog.show(); } }