/* * Copyright (c) 2013, University of Toronto. * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package edu.toronto.cs.xml2rdf.analysis; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintStream; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import edu.toronto.cs.xml2rdf.xml.XMLUtils; public class Statistics { private Document doc; private String prefix; private int entityCount = 0; private int propertyCount = 0; private int propertyMax = 0; private int propertyMin = Integer.MAX_VALUE; private int relCount = 0; private int relMax = 0; private int relMin = Integer.MAX_VALUE; private int linkCount = 0; private int linkMax = 0; private int linkMin = Integer.MAX_VALUE; private int promotedCount = 0; private int mergedCount = 0; private int linkedCount; public Statistics(String schemaPath, String prefix) throws FileNotFoundException, SAXException, IOException, ParserConfigurationException { this.doc = XMLUtils.parse(new FileInputStream(schemaPath), -1); this.prefix = prefix; } public void generateStatistics(PrintStream out) { Element mappingElement = doc.getDocumentElement(); NodeList children = mappingElement.getChildNodes(); for (int i = children.getLength() - 1; i >= 0; i--) { Node childNode = children.item(i); if (childNode instanceof Element) { entityCount++; Element entityElement = (Element) childNode; updateStatsForElement(entityElement); } } // out.println(/*"prop:\t" +*/ propertyCount/(double)entityCount + "\t" + propertyMin + "\t" + propertyMax); out.println(/*"rel:\t" + */relCount / (double) entityCount + "\t" + relMin + "\t" + relMax); // out.println(/*"link:\t" + */linkCount/(double)entityCount + "\t" + linkMin + "\t" + linkMax); // out.println("merged:\t" + mergedCount); // out.println("promoted:\t" + promotedCount); // out.println("linked count:\t" + linkedCount); // out.println("entity count:\t" + entityCount); } private void updateStatsForElement(Element entityElement) { mergedCount += entityElement.getAttribute("type").contains("_or_") ? 1 : 0; NodeList children = entityElement.getChildNodes(); int propertyCount = 0; int linkCount = 0; int relCount = 0; boolean linked = false; for (int i = 0; i < children.getLength(); i++) { Node childNode = children.item(i); if (childNode instanceof Element) { Element entityChild = (Element) childNode; if ("property".equals(entityChild.getNodeName())) { this.propertyCount++; propertyCount++; boolean propertyLinked = false; NodeList propertyChildren = entityChild.getChildNodes(); for (int j = 0; j < propertyChildren.getLength(); j++) { Node propertyChildNode = propertyChildren.item(j); if (propertyChildNode instanceof Element && "ontology-link".equals(propertyChildNode.getNodeName())) { linkCount++; this.linkCount++; propertyLinked = true; } } if (propertyLinked) { linkedCount++; } } else if ("ontology-link".equals(entityChild.getNodeName())) { linkCount++; this.linkCount++; linked = true; } else if ("relation".equals(entityChild.getNodeName())) { relCount++; this.relCount++; } } } this.relMin = Math.min(relCount, this.relMin); this.relMax = Math.max(relCount, this.relMax); this.propertyMax = Math.max(propertyCount, propertyMax); this.propertyMin = Math.min(propertyCount, propertyMin); this.linkMax = Math.max(linkCount, linkMax); this.linkMin = Math.min(linkCount, linkMin); if (relCount == 0 && linkCount > 0 && propertyCount == 1) { promotedCount++; } if (linked) { linkedCount++; } } public static void main(String[] args) throws FileNotFoundException, SAXException, IOException, ParserConfigurationException { for (int i : new int[]{5, 10, 20, 40, 50, 100, 125, 250, 500}) { new Statistics("/home/soheil/workspaces/workspace-xml2rdf/xml2rdf-java/output." + i + ".xml", "http://www.linkedct.org/0.1#") .generateStatistics(System.out); } } }