package org.icij.extract.solr; import org.icij.extract.document.Document; import org.icij.extract.document.DocumentFactory; import org.icij.extract.document.PathIdentifier; import org.icij.extract.parser.ParsingReader; import org.icij.extract.spewer.FieldNames; import org.icij.extract.spewer.SolrSpewer; import org.icij.extract.test.*; import java.util.Map; import java.util.HashMap; import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.file.Paths; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.security.NoSuchAlgorithmException; import org.apache.tika.exception.TikaException; import org.apache.solr.common.SolrDocument; import org.apache.solr.client.solrj.SolrServerException; import org.junit.Test; import org.junit.Assert; import org.junit.After; public class SolrSpewerTest extends SolrJettyTestBase { private final DocumentFactory factory = new DocumentFactory().withIdentifier(new PathIdentifier()); @After public void tearDown() throws Exception { client.deleteByQuery("*:*"); client.commit(true, true); client.optimize(true, true); } @Test public void testWrite() throws IOException, TikaException, NoSuchAlgorithmException, SolrServerException { final SolrSpewer spewer = new SolrSpewer(client, new FieldNames()); final Charset charset = StandardCharsets.UTF_8; final String buffer = "test"; final Document document = factory.create(Paths.get("test-file.txt")); final ParsingReader reader = new ParsingReader(new ByteArrayInputStream(buffer.getBytes(charset))); spewer.write(document, reader); client.commit(true, true); SolrDocument response = client.getById("0"); Assert.assertNull(response); response = client.getById(document.getId()); Assert.assertEquals(document.getPath().toString(), response.get("path")); Assert.assertEquals(buffer + "\n", response.get("content")); } @Test public void testWriteMetadata() throws IOException, TikaException, NoSuchAlgorithmException, SolrServerException, InterruptedException { final SolrSpewer spewer = new SolrSpewer(client, new FieldNames()); final Charset charset = StandardCharsets.UTF_8; final String buffer = "test"; final Document document = factory.create(Paths.get("test/file.txt")); final ParsingReader reader = new ParsingReader(new ByteArrayInputStream(buffer.getBytes(charset))); spewer.outputMetadata(true); final String length = Integer.toString(buffer.getBytes(charset).length); document.getMetadata().set("Content-Length", length); document.getMetadata().set("Content-Type", "text/plain; charset=UTF-8"); spewer.write(document, reader); client.commit(true, true); client.optimize(true, true); final SolrDocument response = client.getById(document.getId()); Assert.assertEquals(document.getPath().toString(), response.getFieldValue("path")); Assert.assertEquals(length, response.getFieldValue("metadata_content_length")); Assert.assertEquals("text/plain", response.getFieldValue("metadata_base_type")); Assert.assertEquals("text/plain; charset=UTF-8", response.getFieldValue("metadata_content_type")); Assert.assertEquals("test", response.getFieldValue("metadata_parent_path")); } @Test public void testWriteTags() throws IOException, TikaException, NoSuchAlgorithmException, SolrServerException, InterruptedException { final SolrSpewer spewer = new SolrSpewer(client, new FieldNames()); final Charset charset = StandardCharsets.UTF_8; final String buffer = "test"; final Document document = factory.create(Paths.get("test/file.txt")); final ParsingReader reader = new ParsingReader(new ByteArrayInputStream(buffer.getBytes(charset))); final Map<String, String> tags = new HashMap<>(); tags.put("batch", "1"); spewer.outputMetadata(true); spewer.setTags(tags); spewer.write(document, reader); client.commit(true, true); client.optimize(true, true); final SolrDocument response = client.getById(document.getId()); Assert.assertEquals(document.getPath().toString(), response.getFieldValue("path")); Assert.assertEquals("1", response.getFieldValue("batch")); } }