/**************************************************************************
Demo Machine Translation plugin for OmegaT(http://www.omegat.org/)
This file was copied exactly from OmegaT
(org.omegat.core.machinetranslators.GoogleTranslate.java)
The original code header and contents follow directly below.
**************************************************************************/
/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2010 Alex Buloichik, Didier Briel
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
**************************************************************************/
package org.omegat.plugin.machinetranslators;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.omegat.core.machinetranslators.BaseTranslate;
import org.omegat.util.Language;
import org.omegat.util.PatternConsts;
import org.omegat.util.WikiGet;
/**
* Support of Google Translate machine translation.
*
* http://code.google.com/intl/be/apis/ajaxlanguage/documentation/#Translation
*
* @author Alex Buloichik (alex73mail@gmail.com)
* @author Didier Briel
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public class Google2Translate extends BaseTranslate {
protected static final String GT_URL = "http://ajax.googleapis.com/ajax/services/language/translate";
protected static final String MARK_BEG = "{\"translatedText\":\"";
protected static final String MARK_END = "\"}";
protected static final Pattern RE_UNICODE = Pattern.compile("\\\\u([0-9A-Fa-f]{4})");
protected static final Pattern RE_HTML = Pattern.compile("([0-9]+);");
@Override
protected String getPreferenceName()
{
return "allow_google2_translate";
}
public String getName() {
return "Google2 Translate";
}
@Override
protected String translate(Language sLang, Language tLang, String text) throws Exception {
String trText = text.length() > 5000 ? text.substring(0, 4997) + "..." : text;
Map<String, String> p = new TreeMap<String, String>();
p.put("v", "1.0");
String targetLang = tLang.getLanguageCode();
// Differentiate in target between simplified and traditional Chinese
if ((tLang.getLanguage().compareToIgnoreCase("zh-cn") == 0)
|| (tLang.getLanguage().compareToIgnoreCase("zh-tw") == 0))
targetLang = tLang.getLanguage();
else if ((tLang.getLanguage().compareToIgnoreCase("zh-hk") == 0))
targetLang = "ZH-TW"; // Google doesn't recognize ZH-HK
p.put("langpair", sLang.getLanguageCode() + '|' + targetLang);
p.put("q", trText);
String v = WikiGet.post(GT_URL, p);
while (true) {
Matcher m = RE_UNICODE.matcher(v);
if (!m.find()) {
break;
}
String g = m.group();
char c = (char) Integer.parseInt(m.group(1), 16);
v = v.replace(g, Character.toString(c));
}
v = v.replace(""", """);
v = v.replace(" ", " ");
v = v.replace("&", "&");
while (true) {
Matcher m = RE_HTML.matcher(v);
if (!m.find()) {
break;
}
String g = m.group();
char c = (char) Integer.parseInt(m.group(1));
v = v.replace(g, Character.toString(c));
}
int beg = v.indexOf(MARK_BEG) + MARK_BEG.length();
int end = v.indexOf(MARK_END, beg);
String tr = v.substring(beg, end);
// Attempt to clean spaces added by GT
// Spaces after
Matcher tag = PatternConsts.OMEGAT_TAG_SPACE.matcher(tr);
while (tag.find()) {
String searchTag = tag.group();
if (text.indexOf(searchTag) == -1) { // The tag didn't appear with a
// trailing space in the source text
String replacement = searchTag.substring(0, searchTag.length() - 1);
tr = tr.replace(searchTag, replacement);
}
}
// Spaces before
tag = PatternConsts.SPACE_OMEGAT_TAG.matcher(tr);
while (tag.find()) {
String searchTag = tag.group();
if (text.indexOf(searchTag) == -1) { // The tag didn't appear with a
// leading space in the source text
String replacement = searchTag.substring(1, searchTag.length());
tr = tr.replace(searchTag, replacement);
}
}
return tr;
}
}