/** * */ package com.maalaang.omtwitter.uima.consumer; import java.io.BufferedWriter; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; import org.apache.uima.collection.CasConsumer_ImplBase; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.ResourceProcessException; import org.apache.uima.util.Level; import org.apache.uima.util.Logger; import com.maalaang.omtwitter.ml.SvmFeatureVector; import com.maalaang.omtwitter.uima.ml.SvmTargetExtractor; import com.maalaang.omtwitter.uima.ml.SvmFVFactory; /** * @author Sangwon Park * */ public class SvmTrainingDataWriteConsumer extends CasConsumer_ImplBase { private static final String PARAM_SVM_FV_FACTORY_CLASS_NAME = "svmFVFactoryClassName"; private static final String PARAM_SVM_TARGET_EXTRACTOR_CLASS_NAME = "svmTargetExtractorClassName"; private static final String PARAM_SVM_TRAINING_DATA_FILE = "svmTrainingDataFile"; private Logger logger = null; private SvmFVFactory fvFactory = null; private SvmTargetExtractor targetExtractor = null; private BufferedWriter bw = null; /* (non-Javadoc) * @see org.apache.uima.collection.CasConsumer_ImplBase#initialize() */ @Override public void initialize() throws ResourceInitializationException { super.initialize(); logger = getLogger(); try { Class<?> fvFactoryClass = Class.forName((String)getConfigParameterValue(PARAM_SVM_FV_FACTORY_CLASS_NAME)); fvFactory = (SvmFVFactory) fvFactoryClass.newInstance(); Class<?> targetExtractorClass = Class.forName((String)getConfigParameterValue(PARAM_SVM_TARGET_EXTRACTOR_CLASS_NAME)); targetExtractor = (SvmTargetExtractor) targetExtractorClass.newInstance(); bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream((String)getConfigParameterValue(PARAM_SVM_TRAINING_DATA_FILE)), "UTF-8")); } catch (Exception e) { logger.log(Level.SEVERE, e.getMessage()); throw new ResourceInitializationException(e); } } public void processCas(CAS aCAS) throws ResourceProcessException { JCas jcas = null; try { jcas = aCAS.getJCas(); } catch (CASException e) { logger.log(Level.SEVERE, e.getMessage()); throw new ResourceProcessException(e); } SvmFeatureVector fv = fvFactory.createFeatureVectorFromJCas(jcas); int target = targetExtractor.extractTargetFromJCas(jcas); if (target == 0) { return; } try { bw.write(String.valueOf(target)); bw.write(' '); bw.write(fv.toString()); bw.write('\n'); bw.flush(); } catch (IOException e) { logger.log(Level.SEVERE, e.getMessage()); throw new ResourceProcessException(e); } } /* (non-Javadoc) * @see org.apache.uima.collection.CasConsumer_ImplBase#destroy() */ @Override public void destroy() { try { bw.close(); bw = null; } catch (IOException e) { logger.log(Level.WARNING, e.getMessage()); } super.destroy(); } }