/******************************************************************************* * Copyright 2012 University of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This code was developed by the Information Integration Group as part * of the Karma project at the Information Sciences Institute of the * University of Southern California. For more information, publications, * and related projects, please see: http://www.isi.edu/integration ******************************************************************************/ package edu.isi.karma.modeling.research; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.LineNumberReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jgrapht.graph.DirectedWeightedMultigraph; import edu.isi.karma.modeling.alignment.GraphUtil; import edu.isi.karma.rep.alignment.ColumnNode; import edu.isi.karma.rep.alignment.DataPropertyLink; import edu.isi.karma.rep.alignment.InternalNode; import edu.isi.karma.rep.alignment.Label; import edu.isi.karma.rep.alignment.Link; import edu.isi.karma.rep.alignment.LiteralNode; import edu.isi.karma.rep.alignment.Node; import edu.isi.karma.rep.alignment.ObjectPropertyLink; public class ModelReader { // public static String varPrefix = "var:"; public static String attPrefix = "att:"; private static String typePredicate = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; private static HashMap<String, String> prefixNsMapping; static class Statement { public Statement(String subject, String predicate, String object) { this.subject = subject; this.predicate = predicate; this.object = object; } private String subject; private String predicate; private String object; public String getSubject() { return subject; } public String getPredicate() { return predicate; } public String getObject() { return object; } public void print() { System.out.print("subject=" + this.subject); System.out.print(", predicate=" + this.predicate); System.out.println(", object=" + this.object); } } public static void main(String[] args) throws Exception { List<ServiceModel> serviceModels = null; try { serviceModels = importServiceModels(Params.INPUT_DIR); if (serviceModels != null) { for (ServiceModel sm : serviceModels) { sm.print(); sm.exportModelToGraphviz(Params.GRAPHVIS_DIR); GraphUtil.serialize(sm.getModel(), Params.JGRAPHT_DIR + sm.getServiceNameWithPrefix() + ".main.jgraph"); } } } catch (IOException e) { e.printStackTrace(); } } private static void initPrefixNsMapping() { prefixNsMapping = new HashMap<String, String>(); // // experiment 1 prefixNsMapping.put("geo", "http://www.w3.org/2003/01/geo/wgs84_pos#"); prefixNsMapping.put("gn", "http://www.geonames.org/ontology#"); prefixNsMapping.put("schema", "http://schema.org/"); prefixNsMapping.put("dbpprop", "http://dbpedia.org/property/"); prefixNsMapping.put("dbpedia-owl", "http://dbpedia.org/ontology/"); prefixNsMapping.put("skos", "http://www.w3.org/2004/02/skos/core#"); prefixNsMapping.put("tzont", "http://www.w3.org/2006/timezone#"); prefixNsMapping.put("qudt", "http://qudt.org/1.1/schema/qudt#"); prefixNsMapping.put("yago", "http://dbpedia.org/class/yago/"); prefixNsMapping.put("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); prefixNsMapping.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); prefixNsMapping.put("foaf", "http://xmlns.com/foaf/0.1/"); prefixNsMapping.put("km", "http://isi.edu/integration/karma/dev#"); // experiment 2 - museum data prefixNsMapping.put("status", "http://metadataregistry.org/uri/RegStatus/"); prefixNsMapping.put("owl2xml", "http://www.w3.org/2006/12/owl2-xml#"); prefixNsMapping.put("schema", "http://schema.org/"); prefixNsMapping.put("aac-ont", "http://www.americanartcollaborative.org/ontology/"); prefixNsMapping.put("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); prefixNsMapping.put("reg", "http://metadataregistry.org/uri/profile/RegAp/"); prefixNsMapping.put("foaf", "http://xmlns.com/foaf/0.1/"); prefixNsMapping.put("dcterms", "http://purl.org/dc/terms/"); prefixNsMapping.put("xsd", "http://www.w3.org/2001/XMLSchema#"); prefixNsMapping.put("DOLCE-Lite", "http://www.loa-cnr.it/ontologies/DOLCE-Lite.owl#"); prefixNsMapping.put("dcmitype", "http://purl.org/dc/dcmitype/"); prefixNsMapping.put("wgs84_pos", "http://www.w3.org/2003/01/geo/wgs84_pos#"); prefixNsMapping.put("FRBRentitiesRDA", "http://rdvocab.info/uri/schema/FRBRentitiesRDA/"); prefixNsMapping.put("saam-ont", "http://americanart.si.edu/ontology/"); prefixNsMapping.put("wot", "http://xmlns.com/wot/0.1/"); prefixNsMapping.put("edm", "http://www.europeana.eu/schemas/edm/"); prefixNsMapping.put("dc", "http://purl.org/dc/elements/1.1/"); prefixNsMapping.put("ElementsGr2", "http://rdvocab.info/ElementsGr2/"); prefixNsMapping.put("skos", "http://www.w3.org/2008/05/skos#"); prefixNsMapping.put("crm", "http://www.cidoc-crm.org/rdfs/cidoc-crm#"); prefixNsMapping.put("vs", "http://www.w3.org/2003/06/sw-vocab-status/ns#"); prefixNsMapping.put("frbr_core", "http://purl.org/vocab/frbr/core#"); prefixNsMapping.put("owl", "http://www.w3.org/2002/07/owl#"); prefixNsMapping.put("ore", "http://www.openarchives.org/ore/terms/"); prefixNsMapping.put("abc", "http://metadata.net/harmony/abc#"); prefixNsMapping.put("dcam", "http://purl.org/dc/dcam/"); prefixNsMapping.put("rdfg", "http://www.w3.org/2004/03/trix/rdfg-1/"); prefixNsMapping.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); prefixNsMapping.put("rr", "http://www.w3.org/ns/r2rml#"); prefixNsMapping.put("km-dev", "http://isi.edu/integration/karma/dev#"); } public static List<ServiceModel> importServiceModels(String importDir) throws IOException { initPrefixNsMapping(); List<ServiceModel> serviceModels = new ArrayList<ServiceModel>(); File dir = new File(importDir); File[] modelExamples = dir.listFiles(); Pattern fileNamePattern = Pattern.compile("s[0-9](|[0-9])-.*\\.txt", Pattern.CASE_INSENSITIVE); // Pattern fileNamePattern = Pattern.compile("s1-.*\\.txt", Pattern.CASE_INSENSITIVE); Pattern serviceNamePattern = Pattern.compile("S[0-9](|[0-9]):(.*)\\(", Pattern.CASE_INSENSITIVE); Matcher matcher; String subject = "", predicate = "", object = ""; String serviceName = ""; int count = 1; if (modelExamples != null) for (File f : modelExamples) { matcher = fileNamePattern.matcher(f.getName()); if (!matcher.find()) { continue; } ServiceModel serviceModel = new ServiceModel("s" + String.valueOf(count)); LineNumberReader lr = new LineNumberReader(new FileReader(f)); String curLine = ""; while ((curLine = lr.readLine()) != null) { matcher = serviceNamePattern.matcher(curLine); if (matcher.find()) { serviceModel.setServiceDescription(curLine.trim()); serviceModel.setServiceNameWithPrefix(f.getName().replaceAll(".txt", "")); serviceName = matcher.group(2).trim(); serviceModel.setServiceName(serviceName); // System.out.println(serviceName); } if (!curLine.trim().startsWith("<N3>")) continue; List<Statement> statements = new ArrayList<Statement>(); while ((curLine = lr.readLine()) != null) { if (curLine.trim().startsWith("</N3>")) break; // System.out.println(curLine); if (curLine.trim().startsWith("#")) continue; String[] parts = curLine.trim().split("\\s+"); if (parts == null || parts.length < 3) { System.out.println("Cannot extract statement from \"" + curLine + " \""); continue; } subject = parts[0].trim(); predicate = parts[1].trim(); object = parts[2].trim(); Statement st = new Statement(subject, predicate, object); statements.add(st); } DirectedWeightedMultigraph<Node, Link> graph = buildGraphsFromStatements2(serviceModel.getId(), statements); if (graph != null) serviceModel.addModel(graph); } lr.close(); serviceModels.add(serviceModel); count++; } return serviceModels; } // private static DirectedWeightedMultigraph<Node, Link> buildGraphsFromStatements(List<Statement> statements) { // // DirectedWeightedMultigraph<Node, Link> graph = // new DirectedWeightedMultigraph<Node, Link>(Link.class); // // if (statements == null || statements.size() == 0) // return null; // // HashMap<String, Node> uri2Nodes = new HashMap<String, Node>(); // // for (Statement st : statements) { // // Node subj = uri2Nodes.get(st.getSubject()); // if (subj == null) { // subj = new Node(st.getSubject(), null, null); // uri2Nodes.put(st.getSubject(), subj); // graph.addNode(subj); // } // // Node obj = uri2Nodes.get(st.getObject()); // if (obj == null) { // obj = new Node(st.getObject(), null, null); // uri2Nodes.put(st.getObject(), obj); // graph.addNode(obj); // } // // Link e = new Link(st.getPredicate(), null, null); // graph.addEdge(subj, obj, e); // // } // // return graph; // } private static String getUri(String prefixedUri) { String uri = prefixedUri; String prefix = ""; String name = ""; if (prefixedUri.indexOf(":") != -1) { prefix = prefixedUri.substring(0 , prefixedUri.indexOf(":")).trim(); name = prefixedUri.substring(prefixedUri.indexOf(":") + 1 , prefixedUri.length()).trim(); if (prefixNsMapping.containsKey(prefix)) { uri = prefixNsMapping.get(prefix) + name; } } return uri; } private static DirectedWeightedMultigraph<Node, Link> buildGraphsFromStatements2(String serviceId, List<Statement> statements) { DirectedWeightedMultigraph<Node, Link> graph = new DirectedWeightedMultigraph<Node, Link>(Link.class); if (statements == null || statements.size() == 0) return null; // Assumption: there is only one rdf:type for each URI HashMap<String, Node> uri2Classes = new HashMap<String, Node>(); for (Statement st : statements) { String subjStr = st.getSubject(); String predicateStr = st.getPredicate(); String objStr = st.getObject(); subjStr = getUri(subjStr); predicateStr = getUri(predicateStr); objStr = getUri(objStr); if (predicateStr.equalsIgnoreCase(typePredicate)) { Node classNode = new InternalNode(objStr, new Label(objStr)); uri2Classes.put(subjStr, classNode); graph.addVertex(classNode); } } // int countOfLiterals = 0; for (Statement st : statements) { String subjStr = st.getSubject(); String predicateStr = st.getPredicate(); String objStr = st.getObject(); subjStr = getUri(subjStr); predicateStr = getUri(predicateStr); objStr = getUri(objStr); if (predicateStr.equalsIgnoreCase(typePredicate)) continue; Node subj = uri2Classes.get(subjStr); if (subj == null) { subj = new InternalNode(subjStr, new Label(subjStr)); graph.addVertex(subj); } Node obj = uri2Classes.get(objStr); if (obj == null) { if (objStr.startsWith(attPrefix)) obj = new ColumnNode(objStr, null, null, ""); else if (objStr.indexOf(":") == -1 && objStr.indexOf("\"") != -1) { // String literalId = "lit:" + serviceId + "_l" + String.valueOf(countOfLiterals); obj = new LiteralNode(objStr, objStr, null); // countOfLiterals ++; } else obj = new InternalNode(objStr, new Label(objStr)); graph.addVertex(obj); } Link e; if (obj instanceof InternalNode) e = new ObjectPropertyLink(predicateStr, new Label(predicateStr)); else e = new DataPropertyLink(predicateStr, new Label(predicateStr)); graph.addEdge(subj, obj, e); } return graph; } }