/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package edu.toronto.cs.xcurator.eval;
import edu.toronto.cs.xcurator.utils.IOUtils;
import edu.toronto.cs.xml2rdf.xml.XMLUtils;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
/**
*
* @author amir
*/
public class EvalUtil {
private static DecimalFormat df = new DecimalFormat("###.####");
public static Set<String> getEntities(String inputfile) throws ParserConfigurationException, SAXException, IOException, XPathExpressionException {
Set<String> entityList = new HashSet<String>();
Document doc = XMLUtils.parse(inputfile, -1);
XPath xpath = XPathFactory.newInstance().newXPath();
NodeList nodeList = (NodeList) xpath.evaluate("/*[local-name()='mapping']/*[local-name()='entity']", doc, XPathConstants.NODESET); // we use this xpath to get rid of namespace
for (int i = 0; i < nodeList.getLength(); i++) {
final String nameWithClassPrefix = nodeList.item(i).getAttributes().getNamedItem("type").getNodeValue();
String className = nameWithClassPrefix.replace("class:", ""); // remove class: from beginning of the string
entityList.add(className);
}
return entityList;
}
public static Set<String> getAttributes(String inputfile) throws ParserConfigurationException, SAXException, IOException, XPathExpressionException {
Set<String> entityList = new HashSet<String>();
Document doc = XMLUtils.parse(inputfile, -1);
XPath xpath = XPathFactory.newInstance().newXPath();
NodeList nodeList = (NodeList) xpath.evaluate("/*[local-name()='mapping']/*[local-name()='entity']/*[local-name()='attribute']", doc, XPathConstants.NODESET); // we use this xpath to get rid of namespace
for (int i = 0; i < nodeList.getLength(); i++) {
final String nameWithClassPrefix = nodeList.item(i).getAttributes().getNamedItem("name").getNodeValue();
String className = nameWithClassPrefix.replace("property:", ""); // remove class: from beginning of the string
entityList.add(className);
}
return entityList;
}
public static Accuracy evaluate(Set<String> result, Set<String> ground, boolean verbose) {
Set<String> intersection = new HashSet<>(result);
intersection.retainAll(ground);
if (verbose) {
System.out.println("result:\n " + result);
System.out.println("size: " + result.size());
System.out.println();
System.out.println("ground:\n " + ground);
System.out.println("size: " + ground.size());
System.out.println();
System.out.println("intersection:\n " + intersection);
System.out.println("size: " + intersection.size());
System.out.println();
}
double pr = (double) intersection.size() / result.size();
double re = (double) intersection.size() / ground.size();
Accuracy ac = new Accuracy(pr, re);
return ac;
}
private static void printAccuracyStats(Set<String> attributeSet, Set<String> grAttributesSet, boolean verbose) {
Accuracy acAttr = evaluate(attributeSet, grAttributesSet, verbose);
final String P = df.format(acAttr.precision());
System.out.println("Prec:" + "\t" + P);
final String R = df.format(acAttr.recall());
System.out.println("Recall:" + "\t" + R);
final String F1 = df.format(acAttr.fscore(1.0));
System.out.println("F1:" + "\t" + F1);
System.out.println(P + "\t" + R + "\t" + F1);
}
public static Set<String> readAttrEntFile(String filename) {
List<String> lines = IOUtils.readFileLineByLine(filename);
Set<String> set = new HashSet<>();
for (String l : lines) {
String[] split = l.split("\\t");
if (split.length == 2) {
set.add(split[1]);
} else if (split.length == 1) {
set.add(split[0]);
} else {
throw new RuntimeException(filename + " malformat.");
}
}
return set;
}
public static void genAccuracyforEntitiesAndAtrributes(String mappingFile, String entityFile, String attributeFile, boolean verbose) throws ParserConfigurationException, SAXException, IOException, XPathExpressionException {
// int[] max = new int[]{10, 25, 50, 100, 250, 500, 1000}; //20, 40, 50, 100, 125, 250, 500, 1000, 2000 }; // 5, 10, 20, 40, 50, 100, 125, 250, 500, 1000, 2000};
// 10, 25, 50, 100, 250, 500, 1000
// int[] phase = new int[]{1, 2, 3, 4, 5};
Set<String> entitySet = getEntities(mappingFile);
Set<String> attributeSet = getAttributes(mappingFile);
// System.out.println("Entities found: " + grEntityList.size());
// for (String entity : grEntityList) {
// System.out.println(entity);
// }
Set<String> grEntitySet = readAttrEntFile(entityFile);
Set<String> grAttributesSet = readAttrEntFile(attributeFile);
System.out.println("mapping file:" + mappingFile);
System.out.println("ENTITIES:");
printAccuracyStats(entitySet, grEntitySet, verbose);
System.out.println("ATTRIBUTES:");
printAccuracyStats(attributeSet, grAttributesSet, verbose);
System.out.println();
if (verbose) {
System.out.println("MISSED:");
Set<String> grUnion = new HashSet<>(grEntitySet);
grUnion.addAll(grAttributesSet);
Set<String> union = new HashSet<>(entitySet);
union.addAll(attributeSet);
Set onlyInResult = new HashSet<>(union);
onlyInResult.removeAll(grUnion);
System.out.println("Only In Result (both entities & attributes):\n" + onlyInResult);
System.out.println("size: " + onlyInResult.size());
Set onlyInGr = new HashSet<>(grUnion);
onlyInGr.removeAll(union);
System.out.println();
System.out.println("Only In GroundTruth (both entities & attributes):\n" + onlyInGr);
System.out.println("size: " + onlyInGr.size());
System.out.println();
Set wrongAttributes = new HashSet<>(grAttributesSet);
wrongAttributes.retainAll(entitySet);
System.out.println();
System.out.println("Attributes that recognized as entity:\n" + wrongAttributes);
System.out.println("size: " + wrongAttributes.size());
System.out.println();
Set wrongEntities = new HashSet<>(grEntitySet);
wrongEntities.retainAll(attributeSet);
System.out.println();
System.out.println("Entities that recognized as attribute:\n" + wrongEntities);
System.out.println("size: " + wrongEntities.size());
System.out.println();
}
}
}