package edu.stanford.nlp.pipeline;
import junit.framework.TestCase;
import java.io.IOException;
import java.util.Arrays;
import java.util.Properties;
import java.util.function.Consumer;
/**
* A test for {@link edu.stanford.nlp.pipeline.JSONOutputter}.
*
* @author Gabor Angeli
*/
public class JSONOutputterTest extends TestCase {
// -----
// BEGIN TESTS FOR JSON WRITING
// -----
private static String indent(String in) { return in.replace("\t", JSONOutputter.INDENT_CHAR); }
private static void testEscape(String input, String expected) {
assertEquals(1, input.length()); // make sure I'm escaping right
assertEquals(2, expected.length()); // make sure I'm escaping right
assertEquals(expected, JSONOutputter.cleanJSON(input));
}
private static void testNoEscape(String input, String expected) {
assertEquals(1, input.length()); // make sure I'm escaping right
assertEquals(1, expected.length()); // make sure I'm escaping right
assertEquals(expected, JSONOutputter.cleanJSON(input));
}
public void testSanitizeJSONString() {
testEscape("\b", "\\b");
testEscape("\f", "\\f");
testEscape("\n", "\\n");
testEscape("\r", "\\r");
testEscape("\t", "\\t");
testNoEscape("'", "'");
testEscape("\"", "\\\"");
testEscape("\\", "\\\\");
assertEquals("\\\\b", JSONOutputter.cleanJSON("\\b"));
}
public void testSimpleJSON() {
assertEquals(indent("{\n\t\"foo\": \"bar\"\n}"),
JSONOutputter.JSONWriter.objectToJSON( (JSONOutputter.Writer writer) -> writer.set("foo", "bar")));
assertEquals(indent("{\n\t\"foo\": \"bar\",\n\t\"baz\": \"hazzah\"\n}"),
JSONOutputter.JSONWriter.objectToJSON( (JSONOutputter.Writer writer) -> {
writer.set("foo", "bar");
writer.set("baz", "hazzah");
}));
}
public void testCollectionJSON() {
assertEquals(indent("{\n\t\"foo\": [\n\t\t\"bar\",\n\t\t\"baz\"\n\t]\n}"),
JSONOutputter.JSONWriter.objectToJSON( (JSONOutputter.Writer writer) -> writer.set("foo", Arrays.asList("bar", "baz"))));
}
public void testNestedJSON() {
assertEquals(indent("{\n\t\"foo\": {\n\t\t\"bar\": \"baz\"\n\t}\n}"),
JSONOutputter.JSONWriter.objectToJSON((JSONOutputter.Writer writer) -> writer.set("foo", (Consumer<JSONOutputter.Writer>) writer1 -> writer1.set("bar", "baz"))));
}
public void testComplexJSON() {
assertEquals(indent("{\n\t\"1.1\": {\n\t\t\"2.1\": [\n\t\t\t\"a\",\n\t\t\t\"b\",\n\t\t\t{\n\t\t\t\t\"3.1\": \"v3.1\"\n\t\t\t}\n\t\t],\n\t\t\"2.2\": \"v2.2\"\n\t}\n}"),
JSONOutputter.JSONWriter.objectToJSON((JSONOutputter.Writer l1) -> l1.set("1.1", (Consumer<JSONOutputter.Writer>) l2 -> {
l2.set("2.1", Arrays.asList(
"a",
"b",
(Consumer<JSONOutputter.Writer>) l3 -> l3.set("3.1", "v3.1")
));
l2.set("2.2", "v2.2");
})));
}
// -----
// BEGIN TESTS FOR ANNOTATION WRITING
// -----
public void testSimpleDocument() throws IOException {
Annotation ann = new Annotation("JSON is neat. Better than XML.");
StanfordCoreNLP pipeline = new StanfordCoreNLP(new Properties() {{ setProperty("annotators", "tokenize, ssplit"); }});
pipeline.annotate(ann);
String actual = new JSONOutputter().print(ann);
String expected = indent(
"{\n" +
"\t\"sentences\": [\n" +
"\t\t{\n" +
"\t\t\t\"index\": 0,\n" +
"\t\t\t\"tokens\": [\n" +
"\t\t\t\t{\n" +
"\t\t\t\t\t\"index\": 1,\n" +
"\t\t\t\t\t\"word\": \"JSON\",\n" +
"\t\t\t\t\t\"originalText\": \"JSON\",\n" +
"\t\t\t\t\t\"characterOffsetBegin\": 0,\n" +
"\t\t\t\t\t\"characterOffsetEnd\": 4,\n" +
"\t\t\t\t\t\"before\": \"\",\n" +
"\t\t\t\t\t\"after\": \" \"\n" +
"\t\t\t\t},\n" +
"\t\t\t\t{\n" +
"\t\t\t\t\t\"index\": 2,\n" +
"\t\t\t\t\t\"word\": \"is\",\n" +
"\t\t\t\t\t\"originalText\": \"is\",\n" +
"\t\t\t\t\t\"characterOffsetBegin\": 5,\n" +
"\t\t\t\t\t\"characterOffsetEnd\": 7,\n" +
"\t\t\t\t\t\"before\": \" \",\n" +
"\t\t\t\t\t\"after\": \" \"\n" +
"\t\t\t\t},\n" +
"\t\t\t\t{\n" +
"\t\t\t\t\t\"index\": 3,\n" +
"\t\t\t\t\t\"word\": \"neat\",\n" +
"\t\t\t\t\t\"originalText\": \"neat\",\n" +
"\t\t\t\t\t\"characterOffsetBegin\": 8,\n" +
"\t\t\t\t\t\"characterOffsetEnd\": 12,\n" +
"\t\t\t\t\t\"before\": \" \",\n" +
"\t\t\t\t\t\"after\": \"\"\n" +
"\t\t\t\t},\n" +
"\t\t\t\t{\n" +
"\t\t\t\t\t\"index\": 4,\n" +
"\t\t\t\t\t\"word\": \".\",\n" +
"\t\t\t\t\t\"originalText\": \".\",\n" +
"\t\t\t\t\t\"characterOffsetBegin\": 12,\n" +
"\t\t\t\t\t\"characterOffsetEnd\": 13,\n" +
"\t\t\t\t\t\"before\": \"\",\n" +
"\t\t\t\t\t\"after\": \" \"\n" +
"\t\t\t\t}\n" +
"\t\t\t]\n" +
"\t\t},\n" +
"\t\t{\n" +
"\t\t\t\"index\": 1,\n" +
"\t\t\t\"tokens\": [\n" +
"\t\t\t\t{\n" +
"\t\t\t\t\t\"index\": 1,\n" +
"\t\t\t\t\t\"word\": \"Better\",\n" +
"\t\t\t\t\t\"originalText\": \"Better\",\n" +
"\t\t\t\t\t\"characterOffsetBegin\": 14,\n" +
"\t\t\t\t\t\"characterOffsetEnd\": 20,\n" +
"\t\t\t\t\t\"before\": \" \",\n" +
"\t\t\t\t\t\"after\": \" \"\n" +
"\t\t\t\t},\n" +
"\t\t\t\t{\n" +
"\t\t\t\t\t\"index\": 2,\n" +
"\t\t\t\t\t\"word\": \"than\",\n" +
"\t\t\t\t\t\"originalText\": \"than\",\n" +
"\t\t\t\t\t\"characterOffsetBegin\": 21,\n" +
"\t\t\t\t\t\"characterOffsetEnd\": 25,\n" +
"\t\t\t\t\t\"before\": \" \",\n" +
"\t\t\t\t\t\"after\": \" \"\n" +
"\t\t\t\t},\n" +
"\t\t\t\t{\n" +
"\t\t\t\t\t\"index\": 3,\n" +
"\t\t\t\t\t\"word\": \"XML\",\n" +
"\t\t\t\t\t\"originalText\": \"XML\",\n" +
"\t\t\t\t\t\"characterOffsetBegin\": 26,\n" +
"\t\t\t\t\t\"characterOffsetEnd\": 29,\n" +
"\t\t\t\t\t\"before\": \" \",\n" +
"\t\t\t\t\t\"after\": \"\"\n" +
"\t\t\t\t},\n" +
"\t\t\t\t{\n" +
"\t\t\t\t\t\"index\": 4,\n" +
"\t\t\t\t\t\"word\": \".\",\n" +
"\t\t\t\t\t\"originalText\": \".\",\n" +
"\t\t\t\t\t\"characterOffsetBegin\": 29,\n" +
"\t\t\t\t\t\"characterOffsetEnd\": 30,\n" +
"\t\t\t\t\t\"before\": \"\",\n" +
"\t\t\t\t\t\"after\": \"\"\n" +
"\t\t\t\t}\n" +
"\t\t\t]\n" +
"\t\t}\n" +
"\t]\n" +
"}");
assertEquals(expected, actual);
}
}