/* LanguageTool, a natural language style checker
* Copyright (C) 2016 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.server;
import com.sun.net.httpserver.HttpExchange;
import org.jetbrains.annotations.NotNull;
import org.languagetool.*;
import org.languagetool.gui.Configuration;
import org.languagetool.language.LanguageIdentifier;
import org.languagetool.rules.CategoryId;
import org.languagetool.rules.RuleMatch;
import org.languagetool.tools.Tools;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.*;
import static org.languagetool.server.ServerTools.print;
/**
* @since 3.4
*/
abstract class TextChecker {
protected abstract void setHeaders(HttpExchange httpExchange);
protected abstract String getResponse(String text, Language lang, Language motherTongue, List<RuleMatch> matches, boolean incompleteResult);
@NotNull
protected abstract List<String> getPreferredVariants(Map<String, String> parameters);
protected abstract Language getLanguage(String text, Map<String, String> parameters, List<String> preferredVariants);
protected abstract boolean getLanguageAutoDetect(Map<String, String> parameters);
@NotNull
protected abstract List<String> getEnabledRuleIds(Map<String, String> parameters);
@NotNull
protected abstract List<String> getDisabledRuleIds(Map<String, String> parameters);
protected static final int CONTEXT_SIZE = 40; // characters
protected final HTTPServerConfig config;
private static final String ENCODING = "UTF-8";
private static final int CACHE_STATS_PRINT = 500; // print cache stats every n cache requests
private final Map<String,Integer> languageCheckCounts = new HashMap<>();
private final boolean internalServer;
private final LanguageIdentifier identifier;
private final ExecutorService executorService;
private final ResultCache cache;
TextChecker(HTTPServerConfig config, boolean internalServer) {
this.config = config;
this.internalServer = internalServer;
this.identifier = new LanguageIdentifier();
this.executorService = Executors.newCachedThreadPool();
this.cache = config.getCacheSize() > 0 ? new ResultCache(config.getCacheSize()) : null;
}
void shutdownNow() {
executorService.shutdownNow();
}
void checkText(String text, HttpExchange httpExchange, Map<String, String> parameters) throws Exception {
checkParams(parameters);
long timeStart = System.currentTimeMillis();
if (text.length() > config.maxTextLength) {
throw new TextTooLongException("Your text exceeds this server's limit of " + config.maxTextLength +
" characters (it's " + text.length() + " characters). Please submit a shorter text.");
}
//print("Check start: " + text.length() + " chars, " + langParam);
boolean autoDetectLanguage = getLanguageAutoDetect(parameters);
List<String> preferredVariants = getPreferredVariants(parameters);
Language lang = getLanguage(text, parameters, preferredVariants);
String motherTongueParam = parameters.get("motherTongue");
Language motherTongue = motherTongueParam != null ? Languages.getLanguageForShortCode(motherTongueParam) : null;
boolean useEnabledOnly = "yes".equals(parameters.get("enabledOnly")) || "true".equals(parameters.get("enabledOnly"));
List<String> enabledRules = getEnabledRuleIds(parameters);
List<String> disabledRules = getDisabledRuleIds(parameters);
List<CategoryId> enabledCategories = getCategoryIds("enabledCategories", parameters);
List<CategoryId> disabledCategories = getCategoryIds("disabledCategories", parameters);
if ((disabledRules.size() > 0 || disabledCategories.size() > 0) && useEnabledOnly) {
throw new IllegalArgumentException("You cannot specify disabled rules or categories using enabledOnly=true");
}
if (enabledRules.size() == 0 && enabledCategories.size() == 0 && useEnabledOnly) {
throw new IllegalArgumentException("You must specify enabled rules or categories when using enabledOnly=true");
}
boolean useQuerySettings = enabledRules.size() > 0 || disabledRules.size() > 0 ||
enabledCategories.size() > 0 || disabledCategories.size() > 0;
boolean allowIncompleteResults = "true".equals(parameters.get("allowIncompleteResults"));
QueryParams params = new QueryParams(enabledRules, disabledRules, enabledCategories, disabledCategories, useEnabledOnly, useQuerySettings, allowIncompleteResults);
List<RuleMatch> ruleMatchesSoFar = Collections.synchronizedList(new ArrayList<>());
Future<List<RuleMatch>> future = executorService.submit(new Callable<List<RuleMatch>>() {
@Override
public List<RuleMatch> call() throws Exception {
// use to fake OOM in thread for testing:
/*if (Math.random() < 0.1) {
throw new OutOfMemoryError();
}*/
return getRuleMatches(text, lang, motherTongue, params, f -> ruleMatchesSoFar.add(f));
}
});
boolean incompleteResult = false;
List<RuleMatch> matches;
if (config.maxCheckTimeMillis < 0) {
matches = future.get();
} else {
try {
matches = future.get(config.maxCheckTimeMillis, TimeUnit.MILLISECONDS);
} catch (ExecutionException e) {
if (e.getCause() != null && e.getCause() instanceof OutOfMemoryError) {
throw (OutOfMemoryError)e.getCause();
} else {
throw e;
}
} catch (TimeoutException e) {
boolean cancelled = future.cancel(true);
Path loadFile = Paths.get("/proc/loadavg"); // works in Linux only(?)
String loadInfo = loadFile.toFile().exists() ? Files.readAllLines(loadFile).toString(): "(unknown)";
String message = "Text checking took longer than allowed maximum of " + config.maxCheckTimeMillis +
" milliseconds (cancelled: " + cancelled +
", language: " + lang.getShortCodeWithCountryAndVariant() +
", " + text.length() + " characters of text, system load: " + loadInfo + ")";
if (params.allowIncompleteResults) {
print(message + " - returning " + ruleMatchesSoFar.size() + " matches found so far");
matches = new ArrayList<>(ruleMatchesSoFar); // threads might still be running it seems, so make a copy
incompleteResult = true;
} else {
throw new RuntimeException(message, e);
}
}
}
setHeaders(httpExchange);
String response = getResponse(text, lang, motherTongue, matches, incompleteResult);
String messageSent = "sent";
String languageMessage = lang.getShortCodeWithCountryAndVariant();
String referrer = httpExchange.getRequestHeaders().getFirst("Referer");
try {
httpExchange.sendResponseHeaders(HttpURLConnection.HTTP_OK, response.getBytes(ENCODING).length);
httpExchange.getResponseBody().write(response.getBytes(ENCODING));
} catch (IOException exception) {
// the client is disconnected
messageSent = "notSent: " + exception.getMessage();
}
if (motherTongue != null) {
languageMessage += " (mother tongue: " + motherTongue.getShortCodeWithCountryAndVariant() + ")";
}
if (autoDetectLanguage) {
languageMessage += "[auto]";
}
String agent = parameters.get("useragent") != null ? parameters.get("useragent") : "-";
Integer count = languageCheckCounts.get(lang.getShortCodeWithCountryAndVariant());
if (count == null) {
count = 1;
} else {
count++;
}
languageCheckCounts.put(lang.getShortCodeWithCountryAndVariant(), count);
print("Check done: " + text.length() + " chars, " + languageMessage + ", #" + count + ", " + referrer + ", "
+ matches.size() + " matches, "
+ (System.currentTimeMillis() - timeStart) + "ms, agent:" + agent
+ ", " + messageSent);
}
protected void checkParams(Map<String, String> parameters) {
if (parameters.get("text") == null) {
throw new IllegalArgumentException("Missing 'text' parameter");
}
}
private List<RuleMatch> getRuleMatches(String text, Language lang,
Language motherTongue, QueryParams params, RuleMatchListener listener) throws Exception {
if (cache != null && cache.requestCount() % CACHE_STATS_PRINT == 0) {
String hitPercentage = String.format(Locale.ENGLISH, "%.2f", cache.hitRate() * 100.0f);
print("Cache stats: " + hitPercentage + "% hit rate");
}
JLanguageTool lt = getLanguageToolInstance(lang, motherTongue, params);
return lt.check(text, listener);
}
@NotNull
private List<CategoryId> getCategoryIds(String paramName, Map<String, String> parameters) {
List<String> stringIds = getCommaSeparatedStrings(paramName, parameters);
List<CategoryId> ids = new ArrayList<>();
for (String stringId : stringIds) {
ids.add(new CategoryId(stringId));
}
return ids;
}
@NotNull
protected List<String> getCommaSeparatedStrings(String paramName, Map<String, String> parameters) {
String disabledParam = parameters.get(paramName);
List<String> result = new ArrayList<>();
if (disabledParam != null) {
result.addAll(Arrays.asList(disabledParam.split(",")));
}
return result;
}
Language detectLanguageOfString(String text, String fallbackLanguage, List<String> preferredVariants) {
Language lang = identifier.detectLanguage(text);
if (lang == null) {
lang = Languages.getLanguageForShortCode(fallbackLanguage != null ? fallbackLanguage : "en");
}
if (preferredVariants.size() > 0) {
for (String preferredVariant : preferredVariants) {
if (!preferredVariant.contains("-")) {
throw new IllegalArgumentException("Invalid format for 'preferredVariants', expected a dash as in 'en-GB': '" + preferredVariant + "'");
}
String preferredVariantLang = preferredVariant.split("-")[0];
if (preferredVariantLang.equals(lang.getShortCode())) {
lang = Languages.getLanguageForShortCode(preferredVariant);
if (lang == null) {
throw new IllegalArgumentException("Invalid 'preferredVariants', no such language/variant found: '" + preferredVariant + "'");
}
}
}
} else {
if (lang.getDefaultLanguageVariant() != null) {
lang = lang.getDefaultLanguageVariant();
}
}
return lang;
}
/**
* Create a JLanguageTool instance for a specific language, mother tongue, and rule configuration.
*
* @param lang the language to be used
* @param motherTongue the user's mother tongue or {@code null}
*/
private JLanguageTool getLanguageToolInstance(Language lang, Language motherTongue, QueryParams params) throws Exception {
JLanguageTool lt = new JLanguageTool(lang, motherTongue, cache);
if (config.getLanguageModelDir() != null) {
lt.activateLanguageModelRules(config.getLanguageModelDir());
}
if (config.getRulesConfigFile() != null) {
configureFromRulesFile(lt, lang);
} else {
configureFromGUI(lt, lang);
}
if (params.useQuerySettings) {
Tools.selectRules(lt, new HashSet<>(params.disabledCategories), new HashSet<>(params.enabledCategories),
new HashSet<>(params.disabledRules), new HashSet<>(params.enabledRules), params.useEnabledOnly);
}
return lt;
}
private void configureFromRulesFile(JLanguageTool langTool, Language lang) throws IOException {
print("Using options configured in " + config.getRulesConfigFile());
// If we are explicitly configuring from rules, ignore the useGUIConfig flag
if (config.getRulesConfigFile() != null) {
org.languagetool.gui.Tools.configureFromRules(langTool, new Configuration(config.getRulesConfigFile().getParentFile(),
config.getRulesConfigFile().getName(), lang));
} else {
throw new RuntimeException("config.getRulesConfigFile() is null");
}
}
private void configureFromGUI(JLanguageTool langTool, Language lang) throws IOException {
Configuration config = new Configuration(lang);
if (internalServer && config.getUseGUIConfig()) {
print("Using options configured in the GUI");
org.languagetool.gui.Tools.configureFromRules(langTool, config);
}
}
private static class QueryParams {
final List<String> enabledRules;
final List<String> disabledRules;
final List<CategoryId> enabledCategories;
final List<CategoryId> disabledCategories;
final boolean useEnabledOnly;
final boolean useQuerySettings;
final boolean allowIncompleteResults;
QueryParams(List<String> enabledRules, List<String> disabledRules, List<CategoryId> enabledCategories, List<CategoryId> disabledCategories,
boolean useEnabledOnly, boolean useQuerySettings, boolean allowIncompleteResults) {
this.enabledRules = enabledRules;
this.disabledRules = disabledRules;
this.enabledCategories = enabledCategories;
this.disabledCategories = disabledCategories;
this.useEnabledOnly = useEnabledOnly;
this.useQuerySettings = useQuerySettings;
this.allowIncompleteResults = allowIncompleteResults;
}
}
}