//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.consumers.csv.internals;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.uima.UimaContext;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import uk.gov.dstl.baleen.consumers.csv.AbstractCsvConsumer;
import uk.gov.dstl.baleen.types.language.Sentence;
import uk.gov.dstl.baleen.types.language.WordToken;
import uk.gov.dstl.baleen.types.semantic.Entity;
import uk.gov.dstl.baleen.types.semantic.Event;
import uk.gov.dstl.baleen.uima.utils.UimaTypesUtils;
/**
* Write events to CSV.
* <p>
* Format is:
* <ul>
* <li>source
* <li>sentence
* <li>type
* <li>Words
* <li>Entities
* <li>Arguments...
*
*/
public class CsvEvent extends AbstractCsvConsumer {
@Override
public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
super.doInitialize(aContext);
write("source", "sentence", "type", "words", "Entities then arguments...");
}
@Override
protected void write(JCas jCas) {
final String source = getDocumentAnnotation(jCas).getSourceUri();
final Map<Event, Collection<Sentence>> coveringSentence = JCasUtil.indexCovering(jCas,
Event.class,
Sentence.class);
JCasUtil.select(jCas, Event.class).stream()
.map(e -> extracted(source, coveringSentence, e))
.filter(s -> s.length > 0)
.forEach(this::write);
}
private String[] extracted(final String source, final Map<Event, Collection<Sentence>> coveringSentence,
Event e) {
String sentence = "";
final Collection<Sentence> sentences = coveringSentence.get(e);
if (!sentences.isEmpty()) {
sentence = sentences.iterator().next().getCoveredText();
} else {
// This shouldn't be empty, unless you have no sentence annotation
return new String[0];
}
final List<String> list = new ArrayList<>();
list.add(source);
list.add(sentence);
if (e.getEventType() != null) {
list.add(Arrays.stream(UimaTypesUtils.toArray(e.getEventType()))
.collect(Collectors.joining(",")));
} else {
list.add("");
}
if (e.getTokens() != null) {
list.add(Arrays.stream(e.getTokens().toArray())
.map(w -> ((WordToken) w).getCoveredText())
.map(this::normalize)
.collect(Collectors.joining(" ")));
} else {
list.add("");
}
if (e.getEntities() != null && e.getEntities().size() > 0) {
Arrays.stream(e.getEntities().toArray())
.forEach(x -> {
final Entity t = (Entity) x;
list.add(normalize(t.getCoveredText()));
});
}
if (e.getArguments() != null && e.getArguments().size() > 0) {
Arrays.stream(e.getArguments().toArray())
.map(this::normalize)
.forEach(list::add);
}
return list.toArray(new String[list.size()]);
}
}