/* LanguageTool, a natural language style checker
* Copyright (C) 2011 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.server;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URLDecoder;
import java.text.SimpleDateFormat;
import java.util.*;
import org.apache.tika.language.LanguageIdentifier;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.gui.Configuration;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.bitext.BitextRule;
import org.languagetool.tools.StringTools;
import org.languagetool.tools.Tools;
import com.sun.net.httpserver.HttpExchange;
import com.sun.net.httpserver.HttpHandler;
class LanguageToolHttpHandler implements HttpHandler {
private static final String CONTENT_TYPE_VALUE = "text/xml; charset=UTF-8";
private static final String ENCODING = "utf-8";
private static final int CONTEXT_SIZE = 40; // characters
private static final int MIN_LENGTH_FOR_AUTO_DETECTION = 60; // characters
private final Set<String> allowedIps;
private final boolean verbose;
private final boolean internalServer;
private final RequestLimiter requestLimiter;
private Configuration config;
private boolean useQuerySettings;
private String[] enabledRules = {};
private String[] disabledRules = {};
private boolean useEnabledOnly;
private int maxTextLength = Integer.MAX_VALUE;
private String allowOriginUrl;
/**
* @param verbose print the input text in case of exceptions
* @param allowedIps set of IPs that may connect or <tt>null</tt> to allow any IP
* @param requestLimiter may be null
*/
LanguageToolHttpHandler(boolean verbose, Set<String> allowedIps, boolean internal, RequestLimiter requestLimiter) throws IOException {
this.verbose = verbose;
this.allowedIps = allowedIps;
this.internalServer = internal;
this.requestLimiter = requestLimiter;
config = new Configuration(null);
}
void setMaxTextLength(int maxTextLength) {
this.maxTextLength = maxTextLength;
}
/**
* Value to set as the "Access-Control-Allow-Origin" http header. Use {@code null}
* to not return that header at all. Use {@code *} to run a server that any other web site
* can use from Javascript/Ajax (search Cross-origin resource sharing (CORS) for details).
*/
void setAllowOriginUrl(String allowOriginUrl) {
this.allowOriginUrl = allowOriginUrl;
}
@Override
public void handle(HttpExchange httpExchange) throws IOException {
String text = null;
try {
final URI requestedUri = httpExchange.getRequestURI();
final String remoteAddress = httpExchange.getRemoteAddress().getAddress().getHostAddress();
if (requestLimiter != null && !requestLimiter.isAccessOkay(remoteAddress)) {
final String errorMessage = "Error: Access from " + StringTools.escapeXML(remoteAddress) +
" denied - too many requests. Allowed maximum requests: " + requestLimiter.getRequestLimit() +
" requests per " + requestLimiter.getRequestLimitPeriodInSeconds() + " seconds";
sendError(httpExchange, HttpURLConnection.HTTP_FORBIDDEN, errorMessage);
throw new RuntimeException(errorMessage);
}
final Map<String, String> parameters = getRequestQuery(httpExchange, requestedUri);
if (allowedIps == null || allowedIps.contains(remoteAddress)) {
if (requestedUri.getRawPath().endsWith("/Languages")) {
// request type: list known languages
printListOfLanguages(httpExchange);
} else {
// request type: text checking
text = parameters.get("text");
if (text == null) {
throw new IllegalArgumentException("Missing 'text' parameter");
}
checkText(text, httpExchange, parameters);
}
} else {
final String errorMessage = "Error: Access from " + StringTools.escapeXML(remoteAddress) + " denied";
sendError(httpExchange, HttpURLConnection.HTTP_FORBIDDEN, errorMessage);
throw new RuntimeException(errorMessage);
}
} catch (Exception e) {
if (verbose) {
print("Exception was caused by this text: " + text);
}
e.printStackTrace();
final String response = "Error: " + StringTools.escapeXML(Tools.getFullStackTrace(e));
sendError(httpExchange, HttpURLConnection.HTTP_INTERNAL_ERROR, response);
} finally {
httpExchange.close();
}
}
private void sendError(HttpExchange httpExchange, int returnCode, String response) throws IOException {
httpExchange.sendResponseHeaders(returnCode, response.getBytes(ENCODING).length);
httpExchange.getResponseBody().write(response.getBytes(ENCODING));
}
private Map<String, String> getRequestQuery(HttpExchange httpExchange, URI requestedUri) throws IOException {
final String query;
if ("post".equalsIgnoreCase(httpExchange.getRequestMethod())) {
query = StringTools.streamToString(httpExchange.getRequestBody(), ENCODING);
} else {
query = requestedUri.getRawQuery();
}
return parseQuery(query);
}
private void printListOfLanguages(HttpExchange httpExchange) throws IOException {
setCommonHeaders(httpExchange);
final String response = getSupportedLanguagesAsXML();
httpExchange.sendResponseHeaders(HttpURLConnection.HTTP_OK, response.getBytes(ENCODING).length);
httpExchange.getResponseBody().write(response.getBytes(ENCODING));
}
private void setCommonHeaders(HttpExchange httpExchange) {
httpExchange.getResponseHeaders().set("Content-Type", CONTENT_TYPE_VALUE);
if (allowOriginUrl != null) {
httpExchange.getResponseHeaders().set("Access-Control-Allow-Origin", allowOriginUrl);
}
}
private static Language detectLanguageOfString(final String text, final String fallbackLanguage) {
// TODO: use identifier.isReasonablyCertain() - but make sure it works!
if (text.length() < MIN_LENGTH_FOR_AUTO_DETECTION && fallbackLanguage != null) {
print("Auto-detected language of text with length " + text.length() + " is not reasonably certain, using '" + fallbackLanguage + "' as fallback");
return Language.getLanguageForShortName(fallbackLanguage);
}
final LanguageIdentifier identifier = new LanguageIdentifier(text);
Language lang;
try {
lang = Language.getLanguageForShortName(identifier.getLanguage());
} catch (IllegalArgumentException e) {
// fall back to English
lang = Language.getLanguageForLocale(Locale.ENGLISH);
}
if (lang.getDefaultVariant() != null) {
lang = lang.getDefaultVariant();
}
return lang;
}
private void checkText(String text, HttpExchange httpExchange, Map<String, String> parameters) throws Exception {
final long timeStart = System.currentTimeMillis();
if (text.length() > maxTextLength) {
throw new IllegalArgumentException("Text is " + text.length() + " characters long, exceeding maximum length of " + maxTextLength);
}
final String langParam = parameters.get("language");
final String autodetectParam = parameters.get("autodetect");
if (langParam == null && (autodetectParam == null || !autodetectParam.equals("1"))) {
throw new IllegalArgumentException("Missing 'language' parameter. Specify language or use autodetect=1 for auto-detecting the language of the input text.");
}
final Language lang;
if (autodetectParam != null && autodetectParam.equals("1")) {
lang = detectLanguageOfString(text, langParam);
print("Auto-detected language: " + lang.getShortNameWithVariant());
} else {
lang = Language.getLanguageForShortName(langParam);
}
final String motherTongueParam = parameters.get("motherTongue");
Language motherTongue = null;
if (null != motherTongueParam) {
motherTongue = Language.getLanguageForShortName(motherTongueParam);
}
final String enabledParam = parameters.get("enabled");
enabledRules = new String[0];
if (null != enabledParam) {
enabledRules = enabledParam.split(",");
}
useEnabledOnly = false;
final String enabledOnly = parameters.get("enabledOnly");
if (null != enabledOnly) {
useEnabledOnly = enabledOnly.equals("yes");
}
final String disabledParam = parameters.get("disabled");
disabledRules = new String[0];
if (null != disabledParam) {
disabledRules = disabledParam.split(",");
}
if (disabledRules.length > 0 && useEnabledOnly) {
throw new IllegalArgumentException("You cannot specify disabled rules using enabledOnly=yes");
}
useQuerySettings = enabledRules.length > 0 || disabledRules.length > 0;
final List<RuleMatch> matches;
final String sourceText = parameters.get("srctext");
if (sourceText == null) {
final JLanguageTool lt = getLanguageToolInstance(lang, motherTongue);
matches = lt.check(text);
} else {
if (motherTongueParam == null) {
throw new IllegalArgumentException("Missing 'motherTongue' for bilingual checks");
}
print("Checking bilingual text, with source length " + sourceText.length() +
" and target length " + text.length() + " (characters), source language " +
motherTongue + " and target language " + langParam);
final JLanguageTool sourceLt = getLanguageToolInstance(motherTongue, null);
final JLanguageTool targetLt = getLanguageToolInstance(lang, null);
final List<BitextRule> bRules = Tools.getBitextRules(motherTongue, lang);
matches = Tools.checkBitext(sourceText, text, sourceLt, targetLt, bRules);
}
setCommonHeaders(httpExchange);
final String response = StringTools.ruleMatchesToXML(matches, text,
CONTEXT_SIZE, StringTools.XmlPrintMode.NORMAL_XML, lang, motherTongue);
httpExchange.sendResponseHeaders(HttpURLConnection.HTTP_OK, response.getBytes(ENCODING).length);
httpExchange.getResponseBody().write(response.getBytes(ENCODING));
String languageMessage = lang.getShortNameWithVariant();
if (motherTongue != null) {
languageMessage += " (mother tongue: " + motherTongue.getShortNameWithVariant() + ")";
}
print("Check done: " + text.length() + " characters, language " + languageMessage + ", "
+ (System.currentTimeMillis() - timeStart) + "ms");
}
private Map<String, String> parseQuery(String query) throws UnsupportedEncodingException {
final Map<String, String> parameters = new HashMap<String, String>();
if (query != null) {
final String[] pairs = query.split("[&]");
final Map<String, String> parameterMap = getParameterMap(pairs);
parameters.putAll(parameterMap);
}
return parameters;
}
private Map<String, String> getParameterMap(String[] pairs) throws UnsupportedEncodingException {
final Map<String, String> parameters = new HashMap<String, String>();
for (String pair : pairs) {
final int delimPos = pair.indexOf("=");
if (delimPos != -1) {
final String param = pair.substring(0, delimPos);
final String key = URLDecoder.decode(param, ENCODING);
final String value = URLDecoder.decode(pair.substring(delimPos + 1), ENCODING);
parameters.put(key, value);
}
}
return parameters;
}
private static void print(String s) {
final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
final String now = dateFormat.format(new Date());
System.out.println(now + " " + s);
}
/**
* Find or create a JLanguageTool instance for a specific language and mother tongue.
* The instance will be reused. If any customization is required (like disabled rules),
* it will be done after acquiring this instance.
*
* @param lang the language to be used.
* @param motherTongue the user's mother tongue or <code>null</code>
* @return a JLanguageTool instance for a specific language and mother tongue.
* @throws Exception when JLanguageTool creation failed
*/
private JLanguageTool getLanguageToolInstance(Language lang, Language motherTongue) throws Exception {
final JLanguageTool newLanguageTool = new JLanguageTool(lang, motherTongue);
newLanguageTool.activateDefaultPatternRules();
newLanguageTool.activateDefaultFalseFriendRules();
config = new Configuration(lang);
if (!useQuerySettings && internalServer && config.getUseGUIConfig()) { // use the GUI config values
configureGUI(newLanguageTool);
}
if (useQuerySettings) {
Tools.selectRules(newLanguageTool, disabledRules, enabledRules, useEnabledOnly);
}
return newLanguageTool;
}
private void configureGUI(JLanguageTool langTool) {
print("Using options configured in the GUI");
//TODO: add a parameter to config to set language
final Set<String> disabledRules = config.getDisabledRuleIds();
if (disabledRules != null) {
for (final String ruleId : disabledRules) {
langTool.disableRule(ruleId);
}
}
final Set<String> disabledCategories = config.
getDisabledCategoryNames();
if (disabledCategories != null) {
for (final String categoryName : disabledCategories) {
langTool.disableCategory(categoryName);
}
}
final Set<String> enabledRules = config.getEnabledRuleIds();
if (enabledRules != null) {
for (String ruleName : enabledRules) {
langTool.enableDefaultOffRule(ruleName);
langTool.enableRule(ruleName);
}
}
}
/**
* Construct an XML string containing all supported languages. <br/>The XML format looks like this:<br/><br/>
* <languages><br/>
* <language name="Catalan" abbr="ca" abbrWithVariant="ca-ES"/><br/>
* <language name="German" abbr="de" abbrWithVariant="de"/><br/>
* <language name="German (Germany)" abbr="de" abbrWithVariant="de-DE"/><br/>
* <languages><br/><br/>
* The languages are sorted alphabetically by their name.
* @return an XML document listing all supported languages
*/
public static String getSupportedLanguagesAsXML() {
final Language[] languageCopy = Language.REAL_LANGUAGES.clone();
final List<Language> languages = Arrays.asList(languageCopy);
Collections.sort(languages, new Comparator<Language>() {
@Override
public int compare(Language o1, Language o2) {
return o1.getName().compareTo(o2.getName());
}
});
final StringBuilder xmlBuffer = new StringBuilder("<?xml version='1.0' encoding='" + ENCODING + "'?>\n<languages>\n");
for (Language lang : languages) {
xmlBuffer.append(String.format("\t<language name=\"%s\" abbr=\"%s\" abbrWithVariant=\"%s\"/> \n", lang.getName(),
lang.getShortName(), lang.getShortNameWithVariant()));
}
xmlBuffer.append("</languages>\n");
return xmlBuffer.toString();
}
}