//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.contentextractors;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import org.apache.uima.UimaContext;
import org.apache.uima.fit.factory.UimaContextFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.junit.Test;
import uk.gov.dstl.baleen.types.metadata.Metadata;
import uk.gov.dstl.baleen.uima.BaleenContentExtractor;
import uk.gov.dstl.baleen.uima.testing.JCasSingleton;
public class CsvContentExtractorTest {
@Test
public void test() throws Exception{
UimaContext context = UimaContextFactory.createUimaContext();
JCas jCas = JCasSingleton.getJCasInstance();
BaleenContentExtractor contentExtractor = new CsvContentExtractor();
File f = new File(getClass().getResource("test.csv").getPath());
Map<String, Object> config = new HashMap<>();
config.put(CsvContentExtractor.PARAM_SEPARATOR, ",");
config.put(CsvContentExtractor.PARAM_CONTENT_COLUMN, "2");
config.put(CsvContentExtractor.PARAM_COLUMNS, Arrays.asList("id", "test1", "", "test3"));
contentExtractor.initialize(context, config);
try(
InputStream is = new FileInputStream(f);
){
contentExtractor.processStream(is, f.getPath(), jCas);
}
contentExtractor.destroy();
assertEquals("Hello world, my name is John Smith", jCas.getDocumentText());
Collection<Metadata> metadata = JCasUtil.select(jCas, Metadata.class);
assertEquals(6, metadata.size());
Map<String, String> metadataMap = new HashMap<>();
for(Metadata md : metadata){
metadataMap.put(md.getKey(), md.getValue());
}
assertTrue(metadataMap.containsKey("id"));
assertEquals("43", metadataMap.get("id"));
assertTrue(metadataMap.containsKey("test1"));
assertEquals("Foo", metadataMap.get("test1"));
assertTrue(metadataMap.containsKey("column4"));
assertEquals("Bar", metadataMap.get("column4"));
assertTrue(metadataMap.containsKey("test3"));
assertEquals("Baz", metadataMap.get("test3"));
assertTrue(metadataMap.containsKey("column6"));
assertEquals("12345", metadataMap.get("column6"));
}
@Test
public void testNotEnoughCols() throws Exception{
UimaContext context = UimaContextFactory.createUimaContext();
JCas jCas = JCasSingleton.getJCasInstance();
BaleenContentExtractor contentExtractor = new CsvContentExtractor();
File f = new File(getClass().getResource("test.csv").getPath());
Map<String, Object> config = new HashMap<>();
config.put(CsvContentExtractor.PARAM_SEPARATOR, ",");
config.put(CsvContentExtractor.PARAM_CONTENT_COLUMN, "20");
config.put(CsvContentExtractor.PARAM_COLUMNS, Arrays.asList("id", "test1", "", "test3"));
contentExtractor.initialize(context, config);
try(
InputStream is = new FileInputStream(f);
){
contentExtractor.processStream(is, f.getPath(), jCas);
fail("Expected error not thrown");
}catch(IOException ioe){
//This error is expected
}
contentExtractor.destroy();
}
}