//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.consumers.csv; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; import java.util.regex.Pattern; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import com.opencsv.CSVWriter; import uk.gov.dstl.baleen.uima.BaleenConsumer; /** * Base class for outputting CSV files. */ public abstract class AbstractCsvConsumer extends BaleenConsumer { private static final Pattern NORMALIZE_PATTERN = Pattern.compile("\\s+"); /** * The filename to use * * @baleen.config output.csv */ public static final String KEY_FILENAME = "filename"; @ConfigurationParameter(name = KEY_FILENAME, defaultValue = "output.csv") private String filename; private CSVWriter writer; /** * Instantiates a new abstract csv consumer. */ protected AbstractCsvConsumer() { super(); } @Override public void doInitialize(UimaContext aContext) throws ResourceInitializationException { super.doInitialize(aContext); try { // Attempt to create the path if it doesn't exist new File(filename).getParentFile().mkdirs(); writer = new CSVWriter(new OutputStreamWriter(new FileOutputStream(filename, false), StandardCharsets.UTF_8), '\t', CSVWriter.NO_QUOTE_CHARACTER); } catch (final IOException e) { throw new ResourceInitializationException(e); } } @Override protected final void doProcess(JCas jCas) throws AnalysisEngineProcessException { write(jCas); try { writer.flush(); } catch (final IOException e) { getMonitor().warn("Unable to flush file", e); } } /** * Write the JCas to CSV. * * @param jCas * the j cas */ protected abstract void write(JCas jCas); @Override protected void doDestroy() { try { if (writer != null) { try { writer.flush(); writer.close(); } catch (final IOException e) { getMonitor().warn("Failed to close csv writer", e); } } } finally { writer = null; } super.doDestroy(); } /** * Called by implementors to write a row. * * @param row * the row */ protected void write(String... row) { writer.writeNext(row); } /** * Normalize the text (called by implementors). * * @param text * the text * @return the string */ protected String normalize(String text) { return NORMALIZE_PATTERN.matcher(text).replaceAll(" ").trim(); } }