/* LanguageTool, a natural language style checker
* Copyright (C) 2014 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.dev.eval;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.rules.IncorrectExample;
import org.languagetool.rules.Rule;
import org.languagetool.tools.StringTools;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
/**
* Runs incorrect example sentences from grammar.xml against an
* After the Deadline instance reachable via http.
* @since 2.6
*/
class AfterTheDeadlineEvaluator {
private static final int WAIT_TIME_MILLIS = 1000;
private final String urlPrefix;
AfterTheDeadlineEvaluator(String urlPrefix) {
this.urlPrefix = urlPrefix;
}
private void run(Language lang) throws IOException, InterruptedException {
List<Rule> rules = getRules(lang);
int sentenceCount = 0;
int errorFoundCount = 0;
System.out.println("Starting test for " + lang.getName() + " on " + urlPrefix);
System.out.println("Wait time between HTTP requests: " + WAIT_TIME_MILLIS + "ms");
System.out.println("Starting test on " + rules.size() + " rules");
for (Rule rule : rules) {
if (rule.isDefaultOff()) {
System.out.println("Skipping rule that is off by default: " + rule.getId());
continue;
}
List<IncorrectExample> incorrectExamples = rule.getIncorrectExamples();
System.out.println("\n" + rule.getId() + ":");
if (incorrectExamples.size() == 0) {
System.out.println(" (no examples)");
continue;
}
for (IncorrectExample example : incorrectExamples) {
boolean match = queryAtDServer(example);
sentenceCount++;
if (match) {
errorFoundCount++;
}
String marker = match ? "+" : "-";
System.out.println(" [" + marker + "] " + example.getExample().replace("<marker>", "<m>").replace("</marker>", "</m>"));
Thread.sleep(WAIT_TIME_MILLIS);
}
//use this to stop: if (sentenceCount > 100) { break; }
}
System.out.println("\nDone.");
System.out.println("Sentence count: " + sentenceCount);
float percentage = (float)errorFoundCount / sentenceCount * 100;
System.out.printf("Expected errors found: " + errorFoundCount + " (%.2f%%)\n", percentage);
}
private List<Rule> getRules(Language lang) throws IOException {
JLanguageTool langTool = new JLanguageTool(lang);
return langTool.getAllActiveRules();
}
private boolean queryAtDServer(IncorrectExample example) {
String sentence = removeMarker(example.getExample());
try {
URL url = new URL(urlPrefix + URLEncoder.encode(sentence, "UTF-8"));
String result = getContent(url);
if (isExpectedErrorFound(example, result)) {
return true;
}
} catch (Exception e) {
throw new RuntimeException(e);
}
return false;
}
private String removeMarker(String sentence) {
return sentence.replace("<marker>", "").replace("</marker>", "");
}
private String getContent(URL url) throws IOException {
final InputStream contentStream = (InputStream) url.getContent();
return StringTools.streamToString(contentStream, "UTF-8");
}
boolean isExpectedErrorFound(IncorrectExample incorrectExample, String resultXml) throws XPathExpressionException {
String example = incorrectExample.getExample();
Document document = getDocument(resultXml);
XPath xPath = XPathFactory.newInstance().newXPath();
NodeList errorStrings = (NodeList)xPath.evaluate("//string/text()", document, XPathConstants.NODESET);
for (int i = 0; i < errorStrings.getLength(); i++) {
String errorStr = errorStrings.item(i).getNodeValue();
if (errorStr.isEmpty()) {
continue;
}
List<Integer> errorStartPosList = getStartPositions(incorrectExample, errorStr);
List<String> mismatches = new ArrayList<>();
for (Integer errorStartPos : errorStartPosList) {
int errorEndPos = errorStartPos + errorStr.length();
int expectedErrorStartPos = example.indexOf("<marker>");
int expectedErrorEndPos = errorStartPos + errorStr.length();
if (errorStartPos == expectedErrorStartPos && errorEndPos == expectedErrorEndPos) {
return true;
} else {
mismatches.add("Position mismatch: " + errorStartPos + "-" + errorEndPos + " != " + expectedErrorStartPos + "-" + expectedErrorEndPos);
}
}
for (String mismatch : mismatches) {
System.out.println(" " + mismatch);
}
}
return false;
}
private List<Integer> getStartPositions(IncorrectExample example, String searchStr) {
List<Integer> posList = new ArrayList<>();
int pos = 0;
String sentence = removeMarker(example.getExample());
while ((pos = sentence.indexOf(searchStr, pos)) != -1) {
posList.add(pos);
pos++;
}
return posList;
}
private Document getDocument(String xml) {
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
InputSource inputSource = new InputSource(new StringReader(xml));
return builder.parse(inputSource);
} catch (Exception e) {
throw new RuntimeException("Could not parse XML: " + xml);
}
}
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: " + AfterTheDeadlineEvaluator.class.getSimpleName() + " <langCode> <urlPrefix>");
System.err.println(" <urlPrefix> After the Deadline instance, e.g. 'http://de.service.afterthedeadline.com/checkDocument?key=test&data='");
System.exit(1);
}
AfterTheDeadlineEvaluator evaluator = new AfterTheDeadlineEvaluator(args[1]);
evaluator.run(Languages.getLanguageForShortCode(args[0]));
}
}