/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2016 Lev Abashkin
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.languagetools;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.ServerSocket;
import java.net.Socket;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.omegat.util.JsonParser;
import org.omegat.util.Language;
import org.omegat.util.Log;
import org.omegat.util.OStrings;
public class LanguageToolNetworkBridge extends BaseLanguageToolBridge {
/* Constants */
private final static String CHECK_PATH = "/v2/check";
private final static String LANGS_PATH = "/v2/languages";
private final static String SERVER_CLASS_NAME = "org.languagetool.server.HTTPServer";
private final static String API_VERSION = "1";
/* Instance scope fields */
private Process server;
private int localPort;
private String serverUrl;
/* Project scope fields */
private Language sourceLang, targetLang;
private String disabledCategories, disabledRules, enabledRules;
/**
* Get instance talking to remote server
*
* @param url
* URL of remote LanguageTool server
* @return new LanguageToolNetworkBridge instance
* @throws java.lang.Exception
*/
public LanguageToolNetworkBridge(Language sourceLang, Language targetLang, String url) throws Exception {
// Try to connect URL
if (!testServer(url)) {
Log.logWarningRB("LT_BAD_URL");
throw new Exception();
}
// OK, URL seems valid, let's use it.
serverUrl = url;
init(sourceLang, targetLang);
}
/**
* Get instance spawning and talking to local server
*
* @param path
* local LanguageTool directory
* @param port
* local port for spawned server to listen
* @return new LanguageToolNetworkBridge instance
* @throws java.lang.Exception
*/
public LanguageToolNetworkBridge(Language sourceLang, Language targetLang, String path, int port) throws Exception {
// Remember port
localPort = port;
File serverJar = new File(path);
// Check if ClassPath points to a real file
if (!serverJar.isFile()) {
Log.logWarningRB("LT_BAD_LOCAL_PATH");
throw new Exception();
}
// Check if socket is available
try {
new ServerSocket(port).close();
} catch (Exception e) {
Log.logWarningRB("LT_BAD_SOCKET");
throw new Exception();
}
// Run the server
ProcessBuilder pb = new ProcessBuilder("java", "-cp", serverJar.getAbsolutePath(), SERVER_CLASS_NAME, "--port",
Integer.toString(port));
pb.redirectErrorStream(true);
server = pb.start();
// Create thread to consume server output
new Thread(() -> {
try (InputStream is = server.getInputStream()) {
@SuppressWarnings("unused")
int b;
while ((b = is.read()) != -1) {
// Discard
}
} catch (IOException e) {
// Do nothing
}
}).start();
// Wait for server to start
int timeout = 10000;
int timeWaiting = 0;
int interval = 10;
while (true) {
Thread.sleep(interval);
timeWaiting += interval;
try {
new Socket("localhost", port).close();
break;
} catch (Exception e) {
}
if (timeWaiting >= timeout) {
Log.logWarningRB("LT_SERVER_START_TIMEOUT");
server.destroy();
throw new Exception();
}
}
serverUrl = "http://localhost:" + port + CHECK_PATH;
Log.log(OStrings.getString("LT_SERVER_STARTED"));
try {
init(sourceLang, targetLang);
} catch (Exception ex) {
stop();
throw ex;
}
}
/**
* Common initialization for both constructors
*
* @throws Exception
* If unable to determine the server's supported languages
*/
private void init(Language sourceLang, Language targetLang) throws Exception {
List<Object> serverLanguages = getSupportedLanguages();
this.sourceLang = negotiateLanguage(serverLanguages, sourceLang);
this.targetLang = negotiateLanguage(serverLanguages, targetLang);
Log.log("Negotiated LanguageTool source language: " + this.sourceLang);
Log.log("Negotiated LanguageTool target language: " + this.targetLang);
}
@Override
public synchronized void stop() {
if (server != null) {
try {
server.destroy();
// Wait for server to release socket
while (true) {
try {
new Socket("localhost", localPort).close();
} catch (Exception e) {
break;
}
}
Log.log(OStrings.getString("LT_SERVER_TERMINATED"));
server = null;
} catch (Exception ex) {
Log.log(ex);
}
}
}
@Override
public void applyRuleFilters(Set<String> disabledCategories,
Set<String> disabledRules, Set<String> enabledRules) {
this.disabledCategories = String.join(",", disabledCategories);
this.disabledRules = String.join(",", disabledRules);
this.enabledRules = String.join(",", enabledRules);
}
@Override
@SuppressWarnings("unchecked")
protected List<LanguageToolResult> getCheckResultsImpl(String sourceText, String translationText) throws Exception {
if (targetLang == null) {
return Collections.emptyList();
}
URL url = new URL(serverUrl);
URLConnection conn = url.openConnection();
conn.setRequestProperty("User-Agent", OStrings.getNameAndVersion());
conn.setDoOutput(true);
try (OutputStreamWriter writer = new OutputStreamWriter(conn.getOutputStream(),
StandardCharsets.UTF_8)) {
String srcLang = sourceLang == null ? null : sourceLang.toString();
writer.write(buildPostData(srcLang, targetLang.toString(), sourceText, translationText,
disabledCategories, disabledRules, enabledRules));
writer.flush();
}
checkHttpError(conn);
String json = "";
try (InputStream in = conn.getInputStream()) {
json = IOUtils.toString(in, StandardCharsets.UTF_8);
}
Map<String, Object> response = (Map<String, Object>) JsonParser.parse(json);
Map<String, String> software = (Map<String, String>) response.get("software");
if (!software.get("apiVersion").equals(API_VERSION)) {
Log.logWarningRB("LT_API_VERSION_MISMATCH");
}
List<Map<String, Object>> matches = (List<Map<String, Object>>) response.get("matches");
return matches.stream().map(match -> {
String message = addSuggestionTags((String) match.get("message"));
int start = (int) match.get("offset");
int end = start + (int) match.get("length");
Map<String, Object> rule = (Map<String, Object>) match.get("rule");
String ruleId = (String) rule.get("id");
String ruleDescription = (String) rule.get("description");
return new LanguageToolResult(message, start, end, ruleId, ruleDescription);
}).collect(Collectors.toList());
}
@SuppressWarnings("unchecked")
protected List<Object> getSupportedLanguages() throws Exception {
// This is a really stupid way to get the /languages endpoint URL, but it'll do for now.
String langsUrl = serverUrl.replace(CHECK_PATH, LANGS_PATH);
URL url = new URL(langsUrl);
URLConnection conn = url.openConnection();
conn.setRequestProperty("User-Agent", OStrings.getNameAndVersion());
conn.setDoOutput(true);
checkHttpError(conn);
String json = "";
try (InputStream in = conn.getInputStream()) {
json = IOUtils.toString(in, StandardCharsets.UTF_8);
}
return (List<Object>) JsonParser.parse(json);
}
static void checkHttpError(URLConnection conn) throws Exception {
if (conn instanceof HttpURLConnection) {
HttpURLConnection httpConn = (HttpURLConnection) conn;
if (httpConn.getResponseCode() != 200) {
try (InputStream err = httpConn.getErrorStream()) {
String errMsg = IOUtils.toString(err, StandardCharsets.UTF_8);
throw new Exception(errMsg);
}
}
}
}
/**
* Replace double quotes with <suggestion></suggestion> tags
* in error message to imitate native LanguageTool behavior
*/
static String addSuggestionTags(String str) {
return str.replaceAll("^([^:]+:\\s?)\"([^']+)\"", "$1<suggestion>$2</suggestion>");
}
/**
* Construct POST request data
*/
static String buildPostData(String sourceLang, String targetLang, String sourceText,
String targetText, String disabledCategories, String disabledRules, String enabledRules)
throws UnsupportedEncodingException {
String encoding = "UTF-8";
StringBuilder result = new StringBuilder();
result.append("text=").append(URLEncoder.encode(targetText, encoding)).append("&language=")
.append(URLEncoder.encode(targetLang, encoding));
if (sourceText != null && sourceLang != null) {
result.append("&srctext=").append(URLEncoder.encode(sourceText, encoding)).append("&motherTongue=")
.append(URLEncoder.encode(sourceLang, encoding));
}
if (disabledCategories != null) {
result.append("&disabledCategories=").append(URLEncoder.encode(disabledCategories, encoding));
}
if (disabledRules != null) {
result.append("&disabledRules=").append(URLEncoder.encode(disabledRules, encoding));
}
if (enabledRules != null) {
result.append("&enabledRules=").append(URLEncoder.encode(enabledRules, encoding));
}
return result.toString();
}
/**
* Try to talk with LT server and return result
*/
static boolean testServer(String testUrl) {
if (testUrl.trim().toLowerCase(Locale.ENGLISH).startsWith("https://languagetool.org/api/v2/check")) {
// Blacklist the official LanguageTool public API specifically
// because this is what users are most likely to try, but they ask
// not to send automated requests:
// http://wiki.languagetool.org/public-http-api
return false;
}
try {
URL url = new URL(testUrl);
URLConnection conn = url.openConnection();
conn.setDoOutput(true);
try (OutputStreamWriter writer = new OutputStreamWriter(conn.getOutputStream(),
StandardCharsets.UTF_8)) {
// Supply a dummy disabled category to force the server to take
// its configuration from this query only, not any server-side
// config.
writer.write(buildPostData(null, "en-US", null, "Test", "FOO", null, null));
writer.flush();
}
checkHttpError(conn);
try (InputStream in = conn.getInputStream()) {
String response = IOUtils.toString(in, StandardCharsets.UTF_8);
if (response.contains("<?xml")) {
Log.logErrorRB("LT_WRONG_FORMAT_RESPONSE");
return false;
} else {
return true;
}
}
} catch (Exception e) {
Log.log(e);
return false;
}
}
/**
* Find the best-matching language from the provided options.
*
* @param serverLangs
* The raw response objects from {@link #getSupportedLanguages()}
* @param desiredLang
* The language to match
* @return The best-matching language, or null if no languages matched at all
*/
@SuppressWarnings("unchecked")
static Language negotiateLanguage(List<Object> serverLangs, Language desiredLang) {
// Search for full xx-YY match
String omLocale = desiredLang.getLanguage();
for (Object obj : serverLangs) {
Map<String, String> lang = (Map<String, String>) obj;
if (omLocale.equalsIgnoreCase(lang.get("longCode"))) {
return desiredLang;
}
}
// Search for just xx match
String omLang = desiredLang.getLanguageCode();
for (Object obj : serverLangs) {
Map<String, String> lang = (Map<String, String>) obj;
if (omLang.equalsIgnoreCase(lang.get("longCode"))) {
return new Language(desiredLang.getLanguageCode());
}
}
for (Object obj : serverLangs) {
Map<String, String> lang = (Map<String, String>) obj;
if (omLang.equalsIgnoreCase(lang.get("code"))) {
return new Language(desiredLang.getLanguageCode());
}
}
return null;
}
}