//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.consumers.json;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import org.apache.commons.io.FilenameUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CommonArrayFS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.Sofa;
import org.apache.uima.resource.ResourceInitializationException;
import uk.gov.dstl.baleen.consumers.utils.SourceUtils;
import uk.gov.dstl.baleen.types.BaleenAnnotation;
import uk.gov.dstl.baleen.uima.BaleenConsumer;
/**
* Simple implementation of a JSON annotation writer.
*
* <p>
* Subclasses only need to implement
* {@link AbstractJsonConsumer#selectAnnotations(JCas)}
* </p>
*
* @param <T>
* the type of annotation to emit.
*/
public abstract class AbstractJsonConsumer<T extends BaleenAnnotation> extends BaleenConsumer {
/** The Constant PARAM_OUTPUT_DIRECTORY. */
public static final String PARAM_OUTPUT_DIRECTORY = "outputDirectory";
/** The output directory. */
@ConfigurationParameter(name = PARAM_OUTPUT_DIRECTORY, defaultValue = "jsonOutput")
private String outputDirectory = "jsonOutput";
/** The object mapper. */
private final ObjectMapper objectMapper;
/**
* Instantiates a new abstract json consumer.
*/
public AbstractJsonConsumer() {
objectMapper = new ObjectMapper();
objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
}
@Override
public void doInitialize(final UimaContext aContext) throws ResourceInitializationException {
super.doInitialize(aContext);
getMonitor().info("Will be writing to " + Paths.get(outputDirectory).toAbsolutePath().toString());
}
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
String documentSourceName = SourceUtils.getDocumentSourceBaseName(jCas, getSupport());
JsonFactory factory = objectMapper.getFactory();
try (Writer writer = createOutputWriter(documentSourceName);
JsonGenerator generator = factory.createGenerator(writer).useDefaultPrettyPrinter()) {
generator.writeStartObject();
writeSofa(generator, jCas);
writeAnnotations(generator, selectAnnotations(jCas));
generator.writeEndObject();
} catch (IOException e) {
getMonitor().warn("Failed to write JSON for " + documentSourceName, e);
}
}
/**
* Write the sofa.
*
* @param generator
* the json generator
* @param jCas
* the JCas
* @throws IOException
* Signals that an I/O exception has occurred.
*/
private void writeSofa(JsonGenerator generator, JCas jCas) throws IOException {
Sofa sofa = jCas.getSofa();
generator.writeFieldName("sofa");
writeFS(generator, sofa);
}
/**
* Creates the output writer for JSON data.
*
* @param documentSourceName
* the document source name
* @return the writer
* @throws IOException
* Signals that an I/O exception has occurred.
*/
private Writer createOutputWriter(final String documentSourceName) throws IOException {
Path directoryPath = Paths.get(outputDirectory);
if (!directoryPath.toFile().exists()) {
Files.createDirectories(directoryPath);
}
String baseName = FilenameUtils.getBaseName(documentSourceName);
Path outputFilePath = directoryPath.resolve(baseName + ".json");
if (outputFilePath.toFile().exists()) {
getMonitor().warn("Overwriting existing output file {}", outputFilePath.toString());
}
return Files.newBufferedWriter(outputFilePath, StandardCharsets.UTF_8);
}
/**
* Write annotations array to the file.
*
* @param generator
* the generator
* @param selectedAnnotations
* the selected annotations
* @throws IOException
* Signals that an I/O exception has occurred.
*/
private void writeAnnotations(JsonGenerator generator, Iterable<? extends FeatureStructure> selectedAnnotations)
throws IOException {
generator.writeFieldName("annotations");
generator.writeStartArray();
for (FeatureStructure baleenAnnotation : selectedAnnotations) {
writeFS(generator, baleenAnnotation);
}
generator.writeEndArray();
}
/**
* Write an annotation to the file.
*
* @param generator
* the generator
* @param annotation
* the annotation
* @throws IOException
* Signals that an I/O exception has occurred.
*/
private void writeFS(JsonGenerator generator, FeatureStructure annotation) throws IOException {
generator.writeStartObject();
Type type = annotation.getType();
generator.writeStringField("type", type.getName());
List<Feature> features = type.getFeatures();
if (annotation instanceof AnnotationFS) {
AnnotationFS annotationFS = (AnnotationFS) annotation;
if (!(annotationFS.getEnd() == 0 && annotationFS.getBegin() == 0)) {
generator.writeStringField("coveredText", annotationFS.getCoveredText());
}
}
if (!features.isEmpty()) {
writeFS(generator, annotation, features);
}
generator.writeEndObject();
}
/**
* Write annotation with features (including non-primitives)
*
* @param generator
* the generator
* @param annotation
* the annotation
* @param features
* the features
* @throws IOException
* Signals that an I/O exception has occurred.
*/
private void writeFS(JsonGenerator generator, FeatureStructure annotation, List<Feature> features)
throws IOException {
generator.writeObjectFieldStart("fields");
for (Feature feature : features) {
if (feature.getRange().isPrimitive()) {
writePrimitive(generator, annotation, feature);
} else if (feature.getRange().isArray()) {
writeArray(generator, annotation, feature);
} else {
if ("uima.cas.AnnotationBase:sofa".equals(feature.getName())) {
continue;
}
FeatureStructure featureValue = annotation.getFeatureValue(feature);
if (featureValue != null) {
generator.writeFieldName(feature.getShortName());
writeFS(generator, featureValue);
}
}
}
generator.writeEndObject();
}
/**
* Write feature array.
*
* @param generator
* the generator
* @param annotation
* the annotation
* @param feature
* the feature
* @throws IOException
* Signals that an I/O exception has occurred.
*/
private void writeArray(JsonGenerator generator, FeatureStructure annotation, Feature feature) throws IOException {
FeatureStructure value = annotation.getFeatureValue(feature);
if (value instanceof FSArray) {
FSArray fsArray = (FSArray) value;
generator.writeFieldName(feature.getShortName());
FeatureStructure[] array = fsArray.toArray();
if (array.length > 0) {
generator.writeStartArray();
for (FeatureStructure featureStructure : array) {
writeFS(generator, featureStructure);
}
generator.writeEndArray();
}
} else if (value instanceof CommonArrayFS) {
CommonArrayFS fsArray = (CommonArrayFS) value;
generator.writeFieldName(feature.getShortName());
String[] array = fsArray.toStringArray();
if (array.length > 0) {
generator.writeStartArray();
for (String featureStructure : array) {
generator.writeString(featureStructure);
}
generator.writeEndArray();
}
}
}
/**
* Write primitive.
*
* @param generator
* the generator
* @param annotation
* the annotation
* @param feature
* the feature
* @throws IOException
* Signals that an I/O exception has occurred.
*/
private void writePrimitive(JsonGenerator generator, FeatureStructure annotation, Feature feature)
throws IOException {
generator.writeFieldName(feature.getShortName());
writePrimitiveValue(generator, annotation, feature);
}
/**
* Write primitive value.
*
* @param generator
* the generator
* @param annotation
* the annotation
* @param feature
* the feature
* @throws IOException
* Signals that an I/O exception has occurred.
*/
private void writePrimitiveValue(JsonGenerator generator, FeatureStructure annotation, Feature feature)
throws IOException {
String range = feature.getRange().getName();
switch (range) {
case CAS.TYPE_NAME_INTEGER:
generator.writeNumber(annotation.getIntValue(feature));
break;
case CAS.TYPE_NAME_FLOAT:
generator.writeNumber(annotation.getFloatValue(feature));
break;
case CAS.TYPE_NAME_STRING:
generator.writeString(annotation.getStringValue(feature));
break;
case CAS.TYPE_NAME_BOOLEAN:
generator.writeBoolean(annotation.getBooleanValue(feature));
break;
case CAS.TYPE_NAME_BYTE:
generator.writeNumber(annotation.getByteValue(feature));
break;
case CAS.TYPE_NAME_SHORT:
generator.writeNumber(annotation.getShortValue(feature));
break;
case CAS.TYPE_NAME_LONG:
generator.writeNumber(annotation.getLongValue(feature));
break;
case CAS.TYPE_NAME_DOUBLE:
generator.writeNumber(annotation.getDoubleValue(feature));
break;
default:
getMonitor().warn("Unexpected primitive type: " + range);
break;
}
}
/**
* Select annotations - subclasses must implement this.
*
* @param jCas
* the JCas
* @return the iterable
*/
protected abstract Iterable<T> selectAnnotations(JCas jCas);
}