/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.core; import static org.carrot2.core.test.assertions.Carrot2CoreAssertions.*; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.StringReader; import java.io.StringWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.io.output.NullOutputStream; import org.carrot2.core.attribute.AttributeNames; import org.carrot2.util.CloseableUtils; import org.carrot2.util.CollectionUtils; import org.carrot2.util.tests.CarrotTestCase; import org.fest.assertions.Assertions; import org.junit.Test; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import org.carrot2.shaded.guava.common.collect.Lists; import org.carrot2.shaded.guava.common.collect.Maps; import static org.junit.Assert.*; /** * Test cases for {@link ProcessingResult}. */ public class ProcessingResultTest extends CarrotTestCase { @Test public void testSerializationDeserializationAll() throws Exception { checkSerializationDeserialization(true, true, true); } @Test public void testSerializationDeserializationDocumentsOnly() throws Exception { checkSerializationDeserialization(true, false, false); } @Test public void testSerializationDeserializationClustersOnly() throws Exception { checkSerializationDeserialization(false, true, false); } @Test public void testSerializationDeserializationAttributesOnly() throws Exception { checkSerializationDeserialization(false, false, true); } @Test public void testDocumentDeserializationFromLegacyXml() throws Exception { final String query = "apple computer"; final String title = "Apple Computer, Inc."; final String snippet = "Macintosh hardware, software, and Internet tools."; final String url = "http:// www.apple.com/"; StringBuilder xml = new StringBuilder(); xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); xml.append("<searchresult>\n"); xml.append("<query>" + query + "</query>\n"); xml.append("<document id=\"0\">"); xml.append("<title>" + title + "</title>\n"); xml.append("<snippet>" + snippet + "</snippet>\n"); xml.append("<url>" + url + "</url>\n"); xml.append("</document>\n"); xml.append("</searchresult>\n"); final ProcessingResult deserialized = ProcessingResult .deserialize(new ByteArrayInputStream(xml.toString().getBytes("UTF-8"))); assertNotNull(deserialized); assertNotNull(deserialized.getAttributes()); Document deserializedDocument = CollectionUtils.getFirst(deserialized .getDocuments()); assertEquals(title, deserializedDocument.getField(Document.TITLE)); assertEquals(snippet, deserializedDocument.getField(Document.SUMMARY)); assertEquals(url, deserializedDocument.getField(Document.CONTENT_URL)); assertNull(deserializedDocument.getField(Document.LANGUAGE)); Assertions.assertThat(deserialized.getAttributes().get(AttributeNames.QUERY)) .isEqualTo(query); } @Test public void deserializeStringDocumentIds() throws Exception { final String title = "Apple Computer, Inc."; final String id = "cafe00f0"; StringBuilder xml = new StringBuilder(); xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); xml.append("<searchresult>\n"); xml.append("<document id=\"" + id + "\">"); xml.append("<title>" + title + "</title>\n"); xml.append("</document>\n"); xml.append("</searchresult>\n"); final ProcessingResult deserialized = ProcessingResult .deserialize(new ByteArrayInputStream(xml.toString().getBytes("UTF-8"))); assertNotNull(deserialized); assertNotNull(deserialized.getAttributes()); Document deserializedDocument = CollectionUtils.getFirst(deserialized .getDocuments()); assertEquals(title, deserializedDocument.getField(Document.TITLE)); assertEquals(id, deserializedDocument.getStringId()); } @Test public void testDocumentDeserializationLanguageByIsoCode() throws Exception { final LanguageCode language = LanguageCode.POLISH; assertThat( ProcessingResult.deserialize(documentXml(language.getIsoCode())) .getDocuments().get(0).getLanguage()).isEqualTo(language); } @Test public void testDocumentDeserializationLanguageByEnumCode() throws Exception { final LanguageCode language = LanguageCode.POLISH; assertThat( ProcessingResult.deserialize(documentXml(language.name())).getDocuments() .get(0).getLanguage()).isEqualTo(language); } private InputStream documentXml(String language) throws Exception { final StringBuilder xml = new StringBuilder(); xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); xml.append("<searchresult>\n"); xml.append("<document id=\"0\" language=\"" + language + "\">"); xml.append("</document>\n"); xml.append("</searchresult>\n"); return new ByteArrayInputStream(xml.toString().getBytes("UTF8")); } @Test public void testClusterDeserializationFromLegacyXml() throws Exception { final String query = "apple computer"; final String title = "Apple Computer, Inc."; final String snippet = "Macintosh hardware, software, and Internet tools."; final String url = "http:// www.apple.com/"; final int documentCount = 3; StringBuilder xml = new StringBuilder(); xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); xml.append("<searchresult>"); xml.append("<query>" + query + "</query>\n"); for (int i = 0; i < documentCount; i++) { xml.append("<document id=\"" + i + "\">"); xml.append("<title>" + title + i + "</title>\n"); xml.append("<snippet>" + snippet + i + "</snippet>\n"); xml.append("<url>" + url + i + "</url>\n"); xml.append("</document>\n"); } xml.append("<group score=\"1.0\">"); xml.append("<title>"); xml.append("<phrase>Data Mining Techniques</phrase>"); xml.append("<phrase>Lectures</phrase>"); xml.append("</title>"); xml.append("<group>"); xml.append("<title>"); xml.append("<phrase>Research</phrase>"); xml.append("</title>"); xml.append("<document refid=\"0\"/>"); xml.append("<document refid=\"1\"/>"); xml.append("</group>"); xml.append("</group>"); xml.append("<group score=\"0.55\">"); xml.append("<title>"); xml.append("<phrase>Software</phrase>"); xml.append("</title>"); xml.append("<document refid=\"1\"/>"); xml.append("<document refid=\"2\"/>"); xml.append("</group>"); xml.append("</searchresult>\n"); final ProcessingResult deserialized = ProcessingResult .deserialize(new ByteArrayInputStream(xml.toString().getBytes("UTF-8"))); assertNotNull(deserialized); assertNotNull(deserialized.getAttributes()); // Check documents assertThatDocuments(deserialized.getDocuments()).hasSize(documentCount); int index = 0; final List<Document> documents = deserialized.getDocuments(); for (Document document : documents) { assertEquals(title + index, document.getField(Document.TITLE)); assertEquals(snippet + index, document.getField(Document.SUMMARY)); assertEquals(url + index, document.getField(Document.CONTENT_URL)); index++; } // Check clusters final List<Cluster> clusters = deserialized.getClusters(); final Cluster clusterA = new Cluster(); clusterA.addPhrases("Data Mining Techniques", "Lectures"); clusterA.setAttribute(Cluster.SCORE, 1.0); final Cluster clusterAA = new Cluster(); clusterAA.addPhrases("Research"); clusterAA.addDocuments(documents.get(0), documents.get(1)); clusterA.addSubclusters(clusterAA); final Cluster clusterB = new Cluster(); clusterB.addPhrases("Software"); clusterB.setAttribute(Cluster.SCORE, 0.55); clusterB.addDocuments(documents.get(1), documents.get(2)); assertThatClusters(clusters).isEquivalentTo( Lists.newArrayList(clusterA, clusterB)); Assertions.assertThat(deserialized.getAttributes().get(AttributeNames.QUERY)) .isEqualTo(query); } @Test public void testJsonSerializationAll() throws IOException { final ProcessingResult result = prepareProcessingResult(); final JsonNode root = getJsonRootNode(result, null, true, true, true); checkJsonQuery(root); checkJsonClusters(result, root); checkJsonDocuments(result, root); Assertions.assertThat(root.get("results")).isNotNull(); } @Test public void testJsonSerializationWithCallback() throws IOException { final String callback = "callback"; final ProcessingResult result = prepareProcessingResult(); final StringWriter json = new StringWriter(); result.serializeJson(json, callback, true, true); final String jsonString = json.toString(); Assertions.assertThat(jsonString).startsWith(callback + "(").endsWith(");"); final String data = jsonString.substring(callback.length() + 1, jsonString.length() - 2); final JsonNode root = getJsonRootNode(data); checkJsonQuery(root); checkJsonClusters(result, root); checkJsonDocuments(result, root); Assertions.assertThat(root.get("results")).isNotNull(); } @Test public void testJsonSerializationDocumentsOnly() throws IOException { final ProcessingResult result = prepareProcessingResult(); final JsonNode root = getJsonRootNode(result, null, true, false, false); checkJsonQuery(root); checkJsonDocuments(result, root); Assertions.assertThat(root.get("clusters")).isNull(); Assertions.assertThat(root.get("results")).isNull(); } @Test public void testJsonSerializationClustersOnly() throws IOException { final ProcessingResult result = prepareProcessingResult(); final JsonNode root = getJsonRootNode(result, null, false, true, false); checkJsonQuery(root); checkJsonClusters(result, root); Assertions.assertThat(root.get("documents")).isNull(); Assertions.assertThat(root.get("results")).isNull(); } @Test public void testJsonSerializationAttributesOnly() throws IOException { final ProcessingResult result = prepareProcessingResult(); final JsonNode root = getJsonRootNode(result, null, false, false, true); checkJsonQuery(root); Assertions.assertThat(root.get("documents")).isNull(); Assertions.assertThat(root.get("clusters")).isNull(); Assertions.assertThat(root.get("results")).isNotNull(); } @Test public void testNoFalseJunkGroupAttribute() throws Exception { Cluster a, b, c; final HashMap<String, Object> attrs = Maps.newHashMap(); attrs.put(AttributeNames.CLUSTERS, Arrays.asList( a = new Cluster("a"), b = new Cluster("b"), c = new Cluster("c"))); b.setOtherTopics(false); c.setOtherTopics(true); ProcessingResult pr = new ProcessingResult(attrs); pr = ProcessingResult.deserialize(pr.serialize()); assertEquals("a", (a = pr.getClusters().get(0)).getLabel()); assertEquals("b", (b = pr.getClusters().get(1)).getLabel()); assertEquals("c", (c = pr.getClusters().get(2)).getLabel()); assertThat((Object) a.getAttribute(Cluster.OTHER_TOPICS)).isNull(); assertThat((Object) b.getAttribute(Cluster.OTHER_TOPICS)).isNull(); assertThat((Object) c.getAttribute(Cluster.OTHER_TOPICS)).isEqualTo(Boolean.TRUE); } private void checkJsonQuery(final JsonNode root) { Assertions.assertThat(root.get("query").textValue()).isEqualTo("query"); } private void checkJsonClusters(final ProcessingResult result, final JsonNode root) { final JsonNode clusters = root.get("clusters"); Assertions.assertThat(clusters).isNotNull(); final ArrayList<JsonNode> clusterNodes = Lists.newArrayList(clusters.elements()); Assertions.assertThat(clusterNodes).hasSize(result.getClusters().size()); } private void checkJsonDocuments(final ProcessingResult result, final JsonNode root) { final JsonNode documents = root.get("documents"); Assertions.assertThat(documents).isNotNull(); final ArrayList<JsonNode> documentNodes = Lists.newArrayList(documents.elements()); Assertions.assertThat(documentNodes).hasSize(result.getDocuments().size()); } private JsonNode getJsonRootNode(final ProcessingResult result, String callback, boolean saveDocuments, boolean saveClusters, boolean saveAttributes) throws IOException, JsonParseException { return getJsonRootNode(getJsonString(result, callback, saveDocuments, saveClusters, saveAttributes)); } private String getJsonString(final ProcessingResult result, String callback, boolean saveDocuments, boolean saveClusters, boolean saveAttributes) throws IOException { final StringWriter json = new StringWriter() { @Override public void close() throws IOException { throw new IOException("Should not be calling close."); } }; result.serializeJson(json, callback, false, saveDocuments, saveClusters, saveAttributes); return json.toString(); } private JsonNode getJsonRootNode(final String jsonString) throws IOException, JsonParseException { final JsonParser jsonParser = new JsonFactory().createParser(new StringReader(jsonString)); final ObjectMapper mapper = new ObjectMapper(); final JsonNode root = mapper.readTree(jsonParser); return root; } private void checkSerializationDeserialization(boolean documentsDeserialized, boolean clustersDeserialized, boolean attributesDeserialized) throws Exception { final ProcessingResult sourceProcessingResult = prepareProcessingResult(); final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); sourceProcessingResult.serialize(new NullOutputStream()); sourceProcessingResult.serialize(outputStream, documentsDeserialized, clustersDeserialized, attributesDeserialized); CloseableUtils.close(outputStream); final ProcessingResult deserialized = ProcessingResult .deserialize(new ByteArrayInputStream(outputStream.toByteArray())); assertNotNull(deserialized); assertNotNull(deserialized.getAttributes()); if (documentsDeserialized) { assertThatDocuments(deserialized.getDocuments()).isEquivalentTo( sourceProcessingResult.getDocuments()); } else { Assertions.assertThat(deserialized.getDocuments()).isNull(); } if (clustersDeserialized) { assertThatClusters(deserialized.getClusters()).isEquivalentTo( sourceProcessingResult.getClusters(), documentsDeserialized); } else { Assertions.assertThat(deserialized.getClusters()).isNull(); } if (attributesDeserialized) { Assertions.assertThat((Object) deserialized.getAttribute(AttributeNames.RESULTS)) .isEqualTo(sourceProcessingResult.getAttribute(AttributeNames.RESULTS)); } else { Assertions.assertThat((Object) deserialized.getAttribute(AttributeNames.RESULTS)) .isNull(); } Assertions.assertThat(deserialized.getAttributes().get(AttributeNames.QUERY)) .isEqualTo(sourceProcessingResult.getAttributes().get(AttributeNames.QUERY)); } private ProcessingResult prepareProcessingResult() { final List<Document> documents = Lists.newArrayList(new Document("Test title 1", "Test snippet 1", "http://test1.com"), new Document("Test title 2", "Test snippet 2", "http://test2.com/test"), new Document("Test title 3", "Test snippet 3. Some more words and <b>html</b>", "http://test2.com"), new Document("Other", "Other", "Other")); final Map<String, Object> attributes = Maps.newHashMap(); attributes.put(AttributeNames.DOCUMENTS, documents); final Document document = documents.get(0); document.setSources(Lists.newArrayList("s1", "s2")); document.setField("testString", "test"); document.setField("testInteger", 10); document.setField("testDouble", 10.3); document.setField("testBoolean", true); document.setLanguage(LanguageCode.POLISH); document.setSources(Lists.newArrayList("s1", "s2")); Document.assignDocumentIds(documents); final Cluster clusterA = new Cluster(); clusterA.addPhrases("Label 1", "Label 2"); clusterA.setAttribute(Cluster.SCORE, 1.0); clusterA.setAttribute("testString", "test"); clusterA.setAttribute("testInteger", 10); clusterA.setAttribute("testDouble", 10.3); clusterA.setAttribute("testBoolean", true); final Cluster clusterAA = new Cluster(); clusterAA.addPhrases("Label 3 zażółć gęślą jaźń"); clusterAA.addDocuments(documents.get(0), documents.get(1)); clusterA.addSubclusters(clusterAA); final Cluster clusterB = new Cluster(); clusterB.addPhrases("Label 4"); clusterB.setAttribute(Cluster.SCORE, 0.55); clusterB.addDocuments(documents.get(1), documents.get(2)); final Cluster clusterO = new Cluster(); clusterO.setOtherTopics(true); clusterO.addPhrases(Cluster.OTHER_TOPICS_LABEL); clusterO.addDocuments(documents.get(3)); final List<Cluster> clusters = Lists.newArrayList(clusterA, clusterB, clusterO); attributes.put(AttributeNames.CLUSTERS, clusters); attributes.put(AttributeNames.QUERY, "query"); attributes.put(AttributeNames.RESULTS, 120); return new ProcessingResult(attributes); } }