//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.consumers.template;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collection;
import java.util.Map;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
import org.apache.commons.io.FilenameUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
/**
* Writes Records, and the TemplateFields covered by them, to YAML or JSON
* files.
*
* This consumer takes extracted records and writes them to YAML or JSON in the
* configured <code>outputDirectory</code>.
* <p>
* The output format defaults to YAML, but can be changed to JSON by setting the
* configuration parameter <code>outputFormat</code> to <code>json</code> (all
* other values will result in YAML output).
* </p>
*
* @see AbstractTemplateRecordConsumer
*/
public class FileTemplateRecordConsumer extends AbstractTemplateRecordConsumer {
/** The Constant PARAM_OUTPUT_DIRECTORY. */
public static final String PARAM_OUTPUT_DIRECTORY = "outputDirectory";
/** The output directory. */
@ConfigurationParameter(name = PARAM_OUTPUT_DIRECTORY, defaultValue = "records")
private String outputDirectory = "records";
/** The Constant PARAM_OUTPUT_FORMAT. */
public static final String PARAM_OUTPUT_FORMAT = "outputFormat";
/** The output format. */
@ConfigurationParameter(name = PARAM_OUTPUT_FORMAT, defaultValue = "yaml")
private String outputFormat = "yaml";
/** The extension to use for output files */
private String outputFileExtension;
/** The object mapper, used for serialising Records */
private ObjectMapper objectMapper;
@Override
public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
super.doInitialize(aContext);
if ("json".equals(outputFormat)) {
objectMapper = new ObjectMapper();
objectMapper.configure(SerializationFeature.INDENT_OUTPUT, true);
outputFileExtension = "json";
} else {
objectMapper = new ObjectMapper(new YAMLFactory());
outputFileExtension = "yaml";
}
objectMapper.setSerializationInclusion(Include.NON_NULL);
}
/**
* Writes the given records for the document to a new file derived from the
* documentSourceName.
*
* The file extension for the output format configured (.yaml or .json) is
* automatically appended. If the file exists, it is overwritten.
*
* @param jcas
* the JCas
* @param documentSourceName
* the document source name
* @param records
* the records
*/
@Override
protected void writeRecords(JCas jcas, String documentSourceName, Map<String, Collection<ExtractedRecord>> records)
throws AnalysisEngineProcessException {
try (Writer w = createOutputWriter(documentSourceName)) {
objectMapper.writeValue(w, records);
} catch (IOException e) {
throw new AnalysisEngineProcessException(e);
}
}
/**
* Creates an output writer for a new file in the configured output
* directory, with appropriate name and extension.
* <p>
* Note: this overwrites existing files (warning if it does so).
* </p>
*
* @param documentSourceName
* the document source name
* @return the writer
* @throws IOException
* Signals that an I/O exception has occurred.
*/
private Writer createOutputWriter(final String documentSourceName) throws IOException {
Path directoryPath = Paths.get(outputDirectory);
if (!directoryPath.toFile().exists()) {
Files.createDirectories(directoryPath);
}
String baseName = FilenameUtils.getBaseName(documentSourceName);
Path outputFilePath = directoryPath.resolve(baseName + "." + outputFileExtension);
if (outputFilePath.toFile().exists()) {
getMonitor().warn("Overwriting existing output properties file {}", outputFilePath);
}
return Files.newBufferedWriter(outputFilePath, StandardCharsets.UTF_8);
}
}