/** * */ package com.maalaang.comtwitter.uima.annotator; import java.io.FileInputStream; import java.io.InputStream; import java.io.ObjectInputStream; import org.apache.uima.UimaContext; import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.FSIterator; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.util.Level; import org.apache.uima.util.Logger; import cc.mallet.fst.CRF; import cc.mallet.pipe.Pipe; import cc.mallet.types.ArraySequence; import cc.mallet.types.Instance; import com.maalaang.omtwitter.uima.type.TokenAnnotation; /** * @author Sangwon Park * */ public class CrfClassificationAnnotator extends JCasAnnotator_ImplBase { private final String PARAM_CRF_MODEL_FILE = "crfModelFile"; private CRF crf = null; private Pipe pipe = null; private Logger logger = null; public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); logger = aContext.getLogger(); try { String crfModel = (String)aContext.getConfigParameterValue(PARAM_CRF_MODEL_FILE); InputStream is = getClass().getClassLoader().getResourceAsStream(crfModel); if (is == null) { is = new FileInputStream(crfModel); } ObjectInputStream ois = new ObjectInputStream(is); crf = (CRF)ois.readObject(); pipe = crf.getInputPipe(); pipe.setTargetProcessing(false); ois.close(); // fis.close(); } catch (Exception e) { logger.log(Level.SEVERE, e.getMessage()); throw new ResourceInitializationException(e); } } public void process(JCas aJCas) throws AnalysisEngineProcessException { FSIterator<Annotation> it = aJCas.getAnnotationIndex(TokenAnnotation.type).iterator(); int size = aJCas.getAnnotationIndex(TokenAnnotation.type).size(); String[][] data = new String[2][size]; int i = 0; while (it.hasNext()) { TokenAnnotation tokenAnnotation = (TokenAnnotation)it.next(); data[0][i] = tokenAnnotation.getCoveredText(); data[1][i] = tokenAnnotation.getPosTag(); i++; } Instance inst = new Instance(data, null, null, null); @SuppressWarnings("unchecked") ArraySequence<String> label = (ArraySequence<String>) crf.label(inst).getTarget(); it.moveToFirst(); i = 0; while (it.hasNext()) { TokenAnnotation tokenAnnotation = (TokenAnnotation)it.next(); String strLabel = label.get(i); tokenAnnotation.setEntityLabel(strLabel); i++; } } }