/** * */ package com.maalaang.omtwitter.uima.consumer; import java.io.BufferedWriter; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; import org.apache.uima.cas.FSIterator; import org.apache.uima.collection.CasConsumer_ImplBase; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.ResourceProcessException; import org.apache.uima.util.Level; import org.apache.uima.util.Logger; import com.maalaang.omtwitter.model.OMTweet; import com.maalaang.omtwitter.uima.type.TokenAnnotation; import com.maalaang.omtwitter.uima.type.TweetAnnotation; /** * @author Sangwon Park * */ public class OMTwitterResultWriteConsumer extends CasConsumer_ImplBase { private final static String PARAM_RESULT_FILE = "resultFile"; private final static String PARAM_SKIP_TWEET_WITH_NO_ENTITY = "skipTweetWithNoEntity"; private final static String PARAM_ENTITY_NONE_LABEL = "entityNoneLabel"; private final static String PARAM_PRINT_RESULT = "printResult"; private final static int ENTITY_MAX = 128; private BufferedWriter bw = null; private Logger logger = null; private boolean skipTweetWithNoEntity = false; private String entityNoneLabel = null; private int[][] entityIdxList = null; private String[] entityLabelList = null; private boolean printResult = false; private int typeIdxSize = 0; private Set<String> entitySet = null; private final static String targetEntityType = "mobile_device"; private Map<String,Integer> typeIdxMap = null; private Map<String,Integer[][]> targetEntityCntMap = null; @Override public void initialize() throws ResourceInitializationException { super.initialize(); logger = getLogger(); try { bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream((String) getConfigParameterValue(PARAM_RESULT_FILE)), "UTF-8")); } catch (Exception e) { logger.log(Level.SEVERE, e.getMessage()); throw new ResourceInitializationException(e); } skipTweetWithNoEntity = (Boolean) getConfigParameterValue(PARAM_SKIP_TWEET_WITH_NO_ENTITY); entityNoneLabel = (String) getConfigParameterValue(PARAM_ENTITY_NONE_LABEL); entityIdxList = new int[ENTITY_MAX][2]; entityLabelList = new String[ENTITY_MAX]; printResult = (Boolean) getConfigParameterValue(PARAM_PRINT_RESULT); typeIdxMap = new HashMap<String,Integer>(); targetEntityCntMap = new HashMap<String,Integer[][]>(); typeIdxMap.put("battery", typeIdxSize++); typeIdxMap.put("camera", typeIdxSize++); typeIdxMap.put("carrier", typeIdxSize++); typeIdxMap.put("connectivity", typeIdxSize++); typeIdxMap.put("display", typeIdxSize++); typeIdxMap.put("generation", typeIdxSize++); typeIdxMap.put("input", typeIdxSize++); typeIdxMap.put("manufacturer", typeIdxSize++); typeIdxMap.put("memory", typeIdxSize++); typeIdxMap.put("networks", typeIdxSize++); typeIdxMap.put("os", typeIdxSize++); typeIdxMap.put("power", typeIdxSize++); typeIdxMap.put("processor", typeIdxSize++); typeIdxMap.put("size", typeIdxSize++); typeIdxMap.put("storage", typeIdxSize++); typeIdxMap.put("type", typeIdxSize++); typeIdxMap.put("weight", typeIdxSize++); entitySet = new HashSet<String>(); } /* (non-Javadoc) * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS) */ public void processCas(CAS aCAS) throws ResourceProcessException { JCas jcas; try { jcas = aCAS.getJCas(); } catch (CASException e) { logger.log(Level.SEVERE, e.getMessage()); throw new ResourceProcessException(e); } boolean in = false; String entityLabel = null; String prevLabel = null; int idx = 0; boolean isEntity = false; entitySet.clear(); FSIterator<Annotation> tokenAnnIt = jcas.getAnnotationIndex(TokenAnnotation.type).iterator(); while (tokenAnnIt.hasNext()) { TokenAnnotation tokenAnn = (TokenAnnotation) tokenAnnIt.next(); String label = tokenAnn.getEntityLabel(); if (entityNoneLabel.equals(label)) { if (isEntity) { idx++; isEntity = false; } } else if (label.endsWith("_B")) { if (isEntity) { idx++; } isEntity = true; entityIdxList[idx][0] = tokenAnn.getBegin(); entityIdxList[idx][1] = tokenAnn.getEnd(); entityLabelList[idx] = label.substring(0, label.length() - 2); } else { entityIdxList[idx][1] = tokenAnn.getEnd(); } } try { if (idx > 0 || !skipTweetWithNoEntity) { TweetAnnotation tweetAnn = (TweetAnnotation) jcas.getAnnotationIndex(TweetAnnotation.type).iterator().next(); StringBuffer sb = new StringBuffer(); String text = tweetAnn.getCoveredText(); sb.append('\n'); sb.append(text); sb.append('\n'); int polarityIndex = polarityToIndex(tweetAnn.getPolarity()); for (int i = 0; i < idx; i++) { String entity = text.substring(entityIdxList[i][0], entityIdxList[i][1]); sb.append('\t'); sb.append(entity); sb.append(" -> "); sb.append(entityLabelList[i]); sb.append('\n'); if (entityLabelList[i].equals(targetEntityType)) { // entitySet.add(entity); String q = tweetAnn.getQuery().toLowerCase(); if (q.indexOf("galaxy") >= 0) { entitySet.add("Samsung Galaxy S III"); } else if (q.indexOf("ipad") >= 0) { entitySet.add("iPad Mini"); } } } sb.append('\t'); sb.append(tweetAnn.getPolarity()); sb.append("\n"); String res = sb.toString(); bw.write(res); bw.flush(); if (printResult) { logger.log(Level.INFO, res); } for (int i = 0; i < idx; i++) { try { if (!entityLabelList[i].equals(entityNoneLabel) && !entityLabelList[i].equals(targetEntityType)) { for (String entity : entitySet) { Integer[][] cnt = targetEntityCntMap.get(entity); if (cnt == null) { cnt = new Integer[typeIdxSize][3]; for (int j = 0; j < typeIdxSize; j++) { cnt[j][0] = 0; cnt[j][1] = 0; cnt[j][2] = 0; } targetEntityCntMap.put(entity, cnt); } int typeIdx = typeIdxMap.get(entityLabelList[i]); cnt[typeIdx][polarityIndex]++; } } } catch (Exception e) { logger.log(Level.SEVERE, "error on entity counting - " + entityLabelList[i]); } } } } catch (Exception e) { logger.log(Level.SEVERE, e.getMessage()); throw new ResourceProcessException(e); } } @Override public void destroy() { Set<Entry<String,Integer[][]>> set1 = targetEntityCntMap.entrySet(); Set<Entry<String,Integer>> set2 = typeIdxMap.entrySet(); String res = null; printResult = true; try { for (int j = 0; j < 3; j++) { int polarityIdx = j; for (Entry<String,Integer[][]> e1 : set1) { StringBuffer sb = new StringBuffer(); sb.append("\n# "); sb.append(e1.getKey()); sb.append(" :: "); sb.append(polarityString(polarityIdx)); sb.append('\n'); Integer[][] cnt = e1.getValue(); for (Entry<String,Integer> e2 : set2) { int num = cnt[e2.getValue()][polarityIdx]; sb.append(String.format("\t%-15s%4d ", e2.getKey(), num)); for (int k = 0; k < num; k++) { sb.append('|'); } sb.append('\n'); } sb.append('\n'); res = sb.toString(); bw.write(res); bw.flush(); if (printResult) { logger.log(Level.INFO, res); } } } for (Entry<String,Integer[][]> e1 : set1) { Integer[][] cnt = e1.getValue(); long totalSum = 0; for (Entry<String,Integer> e2 : set2) { int idx = e2.getValue(); totalSum += cnt[idx][0]; totalSum += cnt[idx][1]; totalSum += cnt[idx][2]; } StringBuffer sb = new StringBuffer(); sb.append("\n# "); sb.append(e1.getKey()); sb.append('\n'); sb.append(String.format("\t%-15s%13s%13s%13s%13s", "property", "positive", "negative", "neutral", "sum")); sb.append('\n'); for (Entry<String,Integer> e2 : set2) { int idx = e2.getValue(); int posCnt = cnt[idx][0]; int negCnt = cnt[idx][1]; int neuCnt = cnt[idx][2]; int sum = posCnt + negCnt + neuCnt; if (sum != 0) { sb.append(String.format("\t%-15s%6.2f%%(%4d)%6.2f%%(%4d)%6.2f%%(%4d)%6.2f%%(%4d)", e2.getKey(), (double)posCnt/(double)sum, posCnt, (double)negCnt/(double)sum, negCnt, (double)neuCnt/(double)sum, neuCnt, (double)sum/(double)totalSum, sum)); } else { sb.append(String.format("\t%-15s%6.2f%%(%4d)%6.2f%%(%4d)%6.2f%%(%4d)%6.2f%%(%4d)", e2.getKey(), 0.0, 0, 0.0, 0, 0.0, 0, 0.0, 0)); } sb.append('\n'); } sb.append('\n'); res = sb.toString(); bw.write(res); bw.flush(); if (printResult) { logger.log(Level.INFO, res); } } bw.close(); } catch (IOException e) { logger.log(Level.SEVERE, e.getMessage()); } catch (Exception e) { e.printStackTrace(); logger.log(Level.SEVERE, e.getMessage()); } super.destroy(); } private int polarityToIndex(String polarity) { if (polarity.equals(OMTweet.POLARITY_STR_POSITIVE)) { return 0; } else if (polarity.equals(OMTweet.POLARITY_STR_NEGATIVE)) { return 1; } else if (polarity.equals(OMTweet.POLARITY_STR_NEUTRAL)) { return 2; } return -1; } private String polarityString(int idx) { switch (idx) { case 0: return "POS"; case 1: return "NEG"; case 2: return "NEU"; } return null; } }