//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.consumers; import java.io.BufferedWriter; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.nio.charset.StandardCharsets; import org.apache.commons.io.output.FileWriterWithEncoding; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.DocumentAnnotation; import org.apache.uima.resource.ResourceInitializationException; import uk.gov.dstl.baleen.types.semantic.Entity; import uk.gov.dstl.baleen.uima.BaleenConsumer; /** * Create a TSV file listing the files processed and how many entities were found in each file * * * @baleen.javadoc */ public class EntityCount extends BaleenConsumer { /** * The file to write results to * * @baleen.config entityCount.tsv */ public static final String PARAM_OUTPUT_FILE = "outputFile"; @ConfigurationParameter(name = PARAM_OUTPUT_FILE, defaultValue="entityCount.tsv") private String outputFile = "entityCount.tsv"; private File output; @Override public void doInitialize(UimaContext aContext) throws ResourceInitializationException { output = new File(outputFile); try{ if(!output.exists() && !output.createNewFile()){ throw new IOException("Can not create output file"); } if(!output.canWrite()){ throw new IOException("Can not write to output file"); } }catch(IOException ioe){ throw new ResourceInitializationException(ioe); } } @Override public void doProcess(JCas jCas) throws AnalysisEngineProcessException { DocumentAnnotation da = getDocumentAnnotation(jCas); try( PrintWriter pw = new PrintWriter(new BufferedWriter(new FileWriterWithEncoding(output, StandardCharsets.UTF_8, true))) ) { int count = JCasUtil.select(jCas, Entity.class).size(); pw.println(da.getSourceUri()+"\t"+count); } catch (IOException e) { getMonitor().warn("Unable to write to output", e); } } @Override public void doDestroy() { output = null; } }