/** * Copyright (C) 2012 cogroo <cogroo@cogroo.org> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cogroo.uima.eval; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.Queue; import java.util.SortedSet; import java.util.TreeSet; import org.apache.uima.UimaContext; import org.apache.uima.analysis_component.AnalysisComponent; import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.text.AnnotationIndex; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.apache.uima.resource.ResourceInitializationException; import cogroo.uima.GoldenGrammarError; import cogroo.uima.GoldenSentence; import cogroo.uima.GrammarError; import cogroo.uima.eval.HtmlWriter.Detail; import com.google.common.base.Objects; public class GrammarCheckerEvaluator extends JCasAnnotator_ImplBase { // public static final String PARAM_REPORTFMEASUER = "ReportFileFMeasure"; // public static final String PARAM_REPORTDETAILS = "ReportFileDetails"; public static final String PARAM_HTMLREPORTPATH = "HTMLReportPath"; public static final String PARAM_CORPUSNAME = "CorpusName"; public static final String PARAM_TEXTREPORTPATH = "TextReportPath"; private BufferedWriter mReportF; private BufferedWriter mReportDetails; private Stats mFMeasure = new Stats(); private Stats mFMeasureRules = new Stats(); private HtmlWriter mHtmlWriter; /** * @see AnalysisComponent#initialize(UimaContext) */ public void initialize(UimaContext aContext) throws ResourceInitializationException { String htmlReportPath = ((String) aContext .getConfigParameterValue(PARAM_HTMLREPORTPATH)).trim(); String corpusName = ((String) aContext .getConfigParameterValue(PARAM_CORPUSNAME)).trim(); String textReportPath = ((String) aContext .getConfigParameterValue(PARAM_TEXTREPORTPATH)).trim(); new File(htmlReportPath).mkdirs(); new File(htmlReportPath).mkdirs(); String pathToReportFMeasure = textReportPath + "/" + corpusName + "-FMeasure.txt"; String pathToReportDetails = textReportPath + "/" + corpusName + "-Details.txt"; String pathToHtmlFMeasure = htmlReportPath + "/" + corpusName + "-FMeasure.html"; String pathToHtmltDetails = htmlReportPath + "/" + corpusName + "-Details.html"; try { mReportDetails = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(pathToReportDetails), "UTF-8")); mReportF = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(pathToReportFMeasure), "UTF-8")); mHtmlWriter = new HtmlWriter(pathToHtmlFMeasure, "Analysis", corpusName); mReportDetails .append("Type\tID\tTarget Err\tTarget Cat\tPred Err\tPred Cat\tRule\tRule Group\tSuggestion\tSentence\n"); } catch (IOException e) { new RuntimeException("Couldn't init file", e); } } @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { List<Error> targetGrammarErrors = new ArrayList<Error>(); List<Error> predictedGrammarErrors = new ArrayList<Error>(); List<Error> sentences = new ArrayList<Error>(); String docText = aJCas.getDocumentText(); AnnotationIndex<Annotation> goldenGrammarErrorIndex = aJCas .getAnnotationIndex(GoldenGrammarError.type); for (Annotation annotation : goldenGrammarErrorIndex) { GoldenGrammarError a = (GoldenGrammarError) annotation; Error s = new Error(a.getBegin(), a.getEnd(), a.getCategory(), "TTT"); targetGrammarErrors.add(s); mFMeasure.addTarget(); mFMeasure.addTarget(a.getCategory()); } AnnotationIndex<Annotation> grammarErrorIndex = aJCas .getAnnotationIndex(GrammarError.type); for (Annotation annotation : grammarErrorIndex) { GrammarError a = (GrammarError) annotation; String cat = a.getCategory(); if (cat == null) { cat = ""; } cat += "#" + a.getRuleId(); Error s = new Error(a.getBegin(), a.getEnd(), cat, a.getReplace()); s.setRuleId(a.getRuleId()); predictedGrammarErrors.add(s); } AnnotationIndex<Annotation> sentIndex = aJCas .getAnnotationIndex(GoldenSentence.type); for (Annotation annotation : sentIndex) { GoldenSentence a = (GoldenSentence) annotation; Error s = new Error(a.getBegin(), a.getEnd(), a.getId(), "SSS"); sentences.add(s); mFMeasure.addSentence(); } normalizeTargetErrors(targetGrammarErrors, predictedGrammarErrors); List<Error> noRules = removeRules(predictedGrammarErrors); mFMeasure.updateScores(targetGrammarErrors, noRules); writeToReport(sentences, targetGrammarErrors, noRules, removeCat(predictedGrammarErrors), docText); } private List<Error> removeRules(List<Error> predictedGrammarErrors) { List<Error> out = new ArrayList<Error>(); for (Error error : predictedGrammarErrors) { String cat = error.getType().substring(0, error.getType().indexOf("#")); Error e = new Error(error.getStart(), error.getEnd(), cat, error.getSuggestion()); e.setRuleId(error.getRuleId()); out.add(e); } return out; } private void writeToReport(List<Error> sentences, List<Error> targetGrammarErrors, List<Error> predictedGrammarErrors, List<String> rules, String docText) { Queue<Error> t = new LinkedList<Error>(targetGrammarErrors); Queue<Error> p = new LinkedList<Error>(predictedGrammarErrors); Queue<String> r = new LinkedList<String>(rules); SortedSet<ReportEntry> entries = new TreeSet<GrammarCheckerEvaluator.ReportEntry>(); for (Error s : sentences) { while ((t.size() > 0 && s.contains(t.peek().getStart())) || (p.size() > 0 && s.contains(p.peek().getStart()))) { if (t.size() > 0 && p.size() > 0 && t.peek().equals(p.peek())) { addTP(p.peek().getType(), r.peek()); System.out.println(docText.substring(s.getStart(), s.getEnd())); entries.add(new ReportEntry("TP", s, t.poll(), p.poll(), r.poll(), docText)); mFMeasure.addTP(); } else if (t.size() > 0 && p.size() > 0 && t.peek().getStart() == p.peek().getStart() && t.peek().getEnd() == p.peek().getEnd()) { addFP(p.peek().getType(), r.peek()); entries.add(new ReportEntry("FP*", s, t.poll(), p.poll(), r.poll(), docText)); mFMeasure.addFP(); } else if (t.size() > 0 && p.size() > 0 && s.contains(t.peek().getStart()) && s.contains(p.peek().getStart())) { // same sentence, sort it if (t.peek().getStart() <= p.peek().getStart()) { entries .add(new ReportEntry("FN", s, t.poll(), null, null, docText)); } else { addFP(p.peek().getType(), r.peek()); entries.add(new ReportEntry("FP", s, null, p.poll(), r.poll(), docText)); mFMeasure.addFP(); } } else if (t.size() > 0 && s.contains(t.peek().getStart())) { entries.add(new ReportEntry("FN", s, t.poll(), null, null, docText)); } else if (p.size() > 0 && s.contains(p.peek().getStart())) { addFP(p.peek().getType(), r.peek()); entries.add(new ReportEntry("FP", s, null, p.poll(), r.poll(), docText)); mFMeasure.addFP(); } else { throw new RuntimeException("Shouldn't get here."); } } } for (ReportEntry reportEntry : entries) { writeToReport(reportEntry); } if (t.size() > 0 || p.size() > 0) { throw new RuntimeException("Couldn't add some errors to report."); } } private void addFP(String type, String rule) { mFMeasure.addFP(type); if (rule == null) { System.out.println("aqui"); } mFMeasureRules.addFP(rule); } private void addTP(String type, String rule) { mFMeasure.addTP(type); if (rule == null) { System.out.println("aqui"); } mFMeasureRules.addTP(rule); } private List<String> removeCat(List<Error> predictedGrammarErrors) { List<String> ret = new ArrayList<String>(predictedGrammarErrors.size()); for (Error s : predictedGrammarErrors) { ret.add(s.getType().substring(s.getType().indexOf("#") + 1)); } return ret; } class ReportEntry implements Comparable<ReportEntry> { public final String type; public final Error selectedError; public final Error targetError; public final Error predictedError; public final String rule; public final String docText; public ReportEntry(String type, Error selected, Error target, Error predicted, String rule, String docText) { super(); this.type = type; this.selectedError = selected; this.targetError = target; this.predictedError = predicted; this.rule = rule; this.docText = docText; } public int compareTo(ReportEntry other) { if (other == this) { return 0; } else { int val = selectedError.getType().compareTo( other.selectedError.getType()); if (val != 0) { return val; } int minThis = getMinError(selectedError, targetError, predictedError); int minOther = getMinError(selectedError, targetError, predictedError); if (minThis != minOther) { return minThis - minOther; } return this.hashCode() - other.hashCode(); } } @Override public int hashCode() { return Objects.hashCode(type, selectedError, targetError, predictedError, rule, docText); } private int getMinError(Error selectedError2, Error targetError2, Error predictedError2) { int min = Integer.MAX_VALUE; if (selectedError2 != null && selectedError2.getStart() < min) { min = selectedError2.getStart(); } if (targetError2 != null && targetError2.getStart() < min) { min = targetError2.getStart(); } if (predictedError2 != null && predictedError2.getStart() < min) { min = predictedError2.getStart(); } return min; } } private void writeToReport(ReportEntry re) { String type = re.type; Error s = re.selectedError; Error t = re.targetError; Error p = re.predictedError; String r = re.rule; String docText = re.docText; StringBuilder line = new StringBuilder(); line.append(type + "\t"); line.append(s.getType() + "\t"); line.append(getDetail(t, docText) + "\t"); line.append(getDetail(p, docText) + "\t"); if (r != null) { line.append(r + "\t"); line.append(getGroup(r) + "\t"); } else { line.append("\t\t"); } String text = s.getCoveredText(docText).toString() .replaceAll("[\t\n]", " "); if(p != null) { line.append(p.getSuggestion()).append("\t"); } else { line.append("\t"); } line.append(text); // if(text.contains("Existe alguma criatura meio estátua, meio mulher?")) { // System.out.println(); // } try { mReportDetails.append(line + "\n"); } catch (IOException e) { throw new RuntimeException("Error writting to report.", e); } } private String getGroup(String r) { return RuleGroups.getGroup(r); } private String getDetail(Error p, String docText) { StringBuilder d = new StringBuilder(); if (p != null) { d.append(p.getCoveredText(docText).toString().replaceAll("[\t\n]", " ") + "\t"); d.append(p.getType()); } else { d.append("\t"); } return d.toString(); } private void normalizeTargetErrors(List<Error> targetGrammarErrors, List<Error> predictedGrammarErrors) { // usually the target is larger than predicted... for (int i = 0; i < targetGrammarErrors.size(); i++) { Error t = targetGrammarErrors.get(i); boolean cont = true; for (int j = 0; j < predictedGrammarErrors.size() && cont; j++) { Error p = predictedGrammarErrors.get(j); if (t.intersects(p)) { if (p.getType().contains(t.getType()) || p.getType().matches("^#\\d+")) { targetGrammarErrors.set(i, new Error(p.getStart(), p.getEnd(), t.getType())); if (p.getType().contains("|")) { String rule = p.getType().substring(p.getType().indexOf("#") + 1); String cat = t.getType() + "#" + rule; predictedGrammarErrors.set(j, new Error(p.getStart(), p.getEnd(), cat)); } cont = false; } } } } } @Override public void collectionProcessComplete() throws AnalysisEngineProcessException { super.collectionProcessComplete(); try { String summary = mFMeasure.toString(); mReportF.write(summary); List<Detail> details = new ArrayList<HtmlWriter.Detail>(); details.add(new Detail("summaryCat", "Verdadeiros positivos, falsos positivos e target por categoria", mFMeasure.toFP_TP_Target_Table())); details.add(new Detail("summaryRules", "Verdadeiros positivos e falsos positivos por regra", mFMeasureRules .toFP_TP_Table())); mHtmlWriter.addData(summary, details, mFMeasure.getData(), mFMeasure.getSentences()); mHtmlWriter.render(); } catch (Exception e1) { e1.printStackTrace(); new RuntimeException("Couldn't write to file", e1); } try { mReportDetails.close(); mReportF.close(); } catch (IOException e) { new RuntimeException("Couldn't close file", e); } } }