package org.icij.extract.spewer; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Office; import org.icij.extract.document.Document; import org.icij.extract.document.DocumentFactory; import org.icij.extract.document.PathIdentifier; import org.junit.Test; import org.junit.Assert; import java.io.IOException; import java.io.Reader; import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; import java.util.stream.Stream; public class SpewerTest { private final DocumentFactory factory = new DocumentFactory().withIdentifier(new PathIdentifier()); private class SpewerStub extends Spewer { private static final long serialVersionUID = 6023532612678893344L; final Map<String, String> metadata = new HashMap<>(); SpewerStub() { super(new FieldNames()); } @Override public void write(final Document document, final Reader reader) throws IOException { } @Override public void writeMetadata(final Document document) throws IOException { final Metadata metadata = document.getMetadata(); new MetadataTransformer(metadata, fields).transform(this.metadata::put, (name, values)-> Stream.of(values).forEach(value -> this.metadata.put(name, value))); } @Override public void close() throws IOException { metadata.clear(); } } @Test public void testDefaultOutputEncodingIsUTF8() { Assert.assertEquals(StandardCharsets.UTF_8, new SpewerStub().getOutputEncoding()); } @Test public void testSetOutputEncoding() { final Spewer spewer = new SpewerStub(); spewer.setOutputEncoding(StandardCharsets.US_ASCII); Assert.assertEquals(StandardCharsets.US_ASCII, spewer.getOutputEncoding()); } @Test public void testDefaultIsToOutputMetadata() { Assert.assertTrue(new SpewerStub().outputMetadata()); } @Test public void testWritesISO8601Dates() throws IOException { final SpewerStub spewer = new SpewerStub(); final Document document = factory.create("test.txt"); final Metadata metadata = document.getMetadata(); final FieldNames fields = spewer.getFields(); final String[] dates = {"2011-12-03+01:00", "2015-06-03"}; final String[] isoDates = {"2011-12-03T12:00:00Z", "2015-06-03T12:00:00Z"}; int i = 0; for (String date: dates) { metadata.set(Office.CREATION_DATE, date); spewer.writeMetadata(document); Assert.assertEquals(date, spewer.metadata.get(fields.forMetadata(Office.CREATION_DATE.getName()))); Assert.assertEquals(isoDates[i++], spewer.metadata.get(fields.forMetadataISODate(Office.CREATION_DATE.getName()))); // Reset the store of written metadata on each iteration. spewer.close(); } } }