/** * */ package com.personalityextractor.evaluation; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.util.ArrayList; import java.util.List; import com.personalityextractor.entity.extractor.EntityExtractFactory; import com.personalityextractor.entity.extractor.EntityExtractFactory.Extracter; import com.personalityextractor.entity.extractor.IEntityExtractor; /** * @author akishore * */ public class EntityExtractionEvaluation { private ArrayList<Record> records; public EntityExtractionEvaluation(String filePath) throws Exception { records = new ArrayList<EntityExtractionEvaluation.Record>(); readDataSet(filePath); } private void readDataSet(String path) throws Exception { BufferedReader rdr = new BufferedReader(new FileReader(new File(path))); String line; while((line = rdr.readLine()) != null) { Record r = parseLine(line); if(r != null || r.text != null) { records.add(r); } } } private Record parseLine(String line) { String text = line.replaceAll("(<E>)|(</E>)", ""); Record r = new Record(); r.text = text; int flag = 0; int sIndex = -1; int eIndex = -1; for(int i=0; i<line.length(); i++) { char c = line.charAt(i); if(c == '<' && flag == 0) { flag = 1; } else if(c == 'E' && flag == 1) { flag = 2; } else if(c == '>' && flag == 2) { flag = 3; } else if(c == '/' && flag == 1) { flag = 4; } else if(c == 'E' && flag == 4) { flag = 5; } else if(c == '>' && flag == 5) { flag = 6; } else { flag = 0; } if(flag == 3 && sIndex == -1) { sIndex = i + 1; } else if(flag == 6) { eIndex = i - 3; r.entities.add(line.substring(sIndex, eIndex)); sIndex = -1; eIndex = -1; } } return r; } public EvalMetrics evaluate(IEntityExtractor extractor) { EvalMetrics metrics = new EvalMetrics(); for(Record r : records) { List<String> extractedEntities = extractor.extract(r.text); for(String exEntity : extractedEntities) { if(r.entities.size() > 0) { for(String entity : r.entities) { if(exEntity.equalsIgnoreCase(entity)) { metrics.incrTP(); } else { metrics.incrFP(); } } } else { metrics.incrFP(); } } } return metrics; } public static void printUsage() { System.out.println("java com....EntityExtEval <annotated dataset>"); System.exit(0); } private class Record { public String text; public ArrayList<String> entities = new ArrayList<String>(); } public static void main(String[] args) { if(args.length < 1) { printUsage(); } try { EntityExtractionEvaluation eval = new EntityExtractionEvaluation(args[0]); for(Extracter e : Extracter.values()) { EvalMetrics metrics = eval.evaluate(EntityExtractFactory.produceExtractor(e)); System.out.println(e.name() + ":\t" + metrics.calculateError()); } } catch (Exception e) { e.printStackTrace(); } } }