/******************************************************************************* * Copyright 2012 University of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This code was developed by the Information Integration Group as part * of the Karma project at the Information Sciences Institute of the * University of Southern California. For more information, publications, * and related projects, please see: http://www.isi.edu/integration ******************************************************************************/ package edu.isi.karma.modeling.alignment.learner; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import edu.isi.karma.modeling.ModelingParams; import edu.isi.karma.modeling.alignment.GraphUtil; import edu.isi.karma.modeling.alignment.GraphVizLabelType; import edu.isi.karma.modeling.alignment.GraphVizUtil; import edu.isi.karma.modeling.alignment.LinkIdFactory; import edu.isi.karma.modeling.alignment.SemanticModel; import edu.isi.karma.modeling.ontology.OntologyManager; import edu.isi.karma.modeling.research.Params; import edu.isi.karma.rep.alignment.ColumnNode; import edu.isi.karma.rep.alignment.DefaultLink; import edu.isi.karma.rep.alignment.InternalNode; import edu.isi.karma.rep.alignment.Label; import edu.isi.karma.rep.alignment.LabeledLink; import edu.isi.karma.rep.alignment.LinkStatus; import edu.isi.karma.rep.alignment.Node; import edu.isi.karma.rep.alignment.NodeSupportingModelsComparator; import edu.isi.karma.util.RandomGUID; import edu.isi.karma.webserver.ContextParametersRegistry; import edu.isi.karma.webserver.ServletContextParameterMap; public class ModelLearningGraphCompact extends ModelLearningGraph { private static Logger logger = LoggerFactory.getLogger(ModelLearningGraphCompact.class); public ModelLearningGraphCompact(OntologyManager ontologyManager) throws IOException { super(ontologyManager, ModelLearningGraphType.Compact); } public ModelLearningGraphCompact(OntologyManager ontologyManager, boolean emptyInstance) { super(ontologyManager, emptyInstance, ModelLearningGraphType.Compact); } private void addInternalNodes(SemanticModel model, Set<InternalNode> addedNodes) { if (model == null || model.getGraph() == null) return; if (addedNodes == null) addedNodes = new HashSet<>(); HashMap<String, Integer> uriCount = new HashMap<>(); for (Node n : model.getGraph().vertexSet()) { if (n instanceof InternalNode) { Integer count = uriCount.get(n.getUri()); if (count == null) uriCount.put(n.getUri(), 1); else uriCount.put(n.getUri(), count.intValue() + 1); } } for (Map.Entry<String, Integer> stringIntegerEntry : uriCount.entrySet()) { int modelNodeCount = stringIntegerEntry.getValue(); Set<Node> matchedNodes = this.graphBuilder.getUriToNodesMap().get(stringIntegerEntry.getKey()); int graphNodeCount = matchedNodes == null ? 0 : matchedNodes.size(); for (int i = 0; i < modelNodeCount - graphNodeCount; i++) { String id = this.nodeIdFactory.getNodeId(stringIntegerEntry.getKey()); Node n = new InternalNode(id, new Label(stringIntegerEntry.getKey())); if (this.graphBuilder.addNode(n)) addedNodes.add((InternalNode)n); } } } private HashMap<Node,Node> getInternalNodeMapping(SemanticModel model) { HashMap<Node,Node> internalNodeMapping = new HashMap<>(); HashMap<String, List<Node>> uriMatchedNodes = new HashMap<>(); String uri; List<Node> sortedNodes = new ArrayList<>(); for (Node n : model.getGraph().vertexSet()) { sortedNodes.add(n); } Collections.sort(sortedNodes); for (Node n : sortedNodes) { if (n instanceof InternalNode) { uri = n.getUri(); List<Node> sortedMatchedNodes = uriMatchedNodes.get(uri); if (sortedMatchedNodes == null) { sortedMatchedNodes = new ArrayList<>(); Set<Node> matchedNodes = this.graphBuilder.getUriToNodesMap().get(uri); if (matchedNodes != null) sortedMatchedNodes.addAll(matchedNodes); Collections.sort(sortedMatchedNodes, new NodeSupportingModelsComparator()); uriMatchedNodes.put(uri, sortedMatchedNodes); } } } for (Node n : sortedNodes) { if (n instanceof InternalNode) { List<Node> sortedMatchedNodes = uriMatchedNodes.get(n.getUri()); internalNodeMapping.put(n, sortedMatchedNodes.get(0)); sortedMatchedNodes.remove(0); } } return internalNodeMapping; } private HashMap<Node,Node> getColumnNodeMapping(SemanticModel model, HashMap<Node,Node> internalNodeMapping) { if (model == null || model.getGraph() == null) return null; if (internalNodeMapping == null || internalNodeMapping.isEmpty()) return null; HashMap<String,List<Node>> dataPropertyColumnNodes = new HashMap<>(); HashMap<Node,Node> columnNodeMapping = new HashMap<>(); for (Node n : model.getGraph().vertexSet()) { if (n instanceof ColumnNode) { ColumnNode c = (ColumnNode)n; Set<LabeledLink> domainLinks = GraphUtil.getDomainLinksInLabeledGraph(model.getGraph(), (ColumnNode)n); if (domainLinks == null || domainLinks.isEmpty()) continue; for (LabeledLink l : domainLinks) { if (l.getSource() == null) continue; Node domain = l.getSource(); Node mappedNode = internalNodeMapping.get(domain); String linkUri = l.getUri(); String key = mappedNode.getId() + linkUri; List<Node> sortedMatchedNodes; if (dataPropertyColumnNodes.containsKey(key)) { sortedMatchedNodes = dataPropertyColumnNodes.get(key); } else { Set<Node> matchedColumnNodes = this.graphBuilder.getNodeDataProperties().get(key); sortedMatchedNodes = new ArrayList<>(); if (matchedColumnNodes != null) sortedMatchedNodes.addAll(matchedColumnNodes); Collections.sort(sortedMatchedNodes, new NodeSupportingModelsComparator()); dataPropertyColumnNodes.put(key, sortedMatchedNodes); } if (sortedMatchedNodes.isEmpty()) { ColumnNode newNode = new ColumnNode(new RandomGUID().toString(), c.getHNodeId(), c.getColumnName(), c.getRdfLiteralType(), c.getLanguage()); if (this.graphBuilder.addNode(newNode)) { columnNodeMapping.put(n, newNode); } } else { columnNodeMapping.put(n, sortedMatchedNodes.get(0)); sortedMatchedNodes.remove(0); } } } } return columnNodeMapping; } private void addLinks(SemanticModel model, HashMap<Node, Node> internalNodeMapping, HashMap<Node, Node> columnNodeMapping, PatternWeightSystem weightSystem) { if (model == null) return; String modelId = model.getId(); Node source, target; Node n1, n2; HashMap<Node,Node> mapping = new HashMap<>(); if (internalNodeMapping != null) mapping.putAll(internalNodeMapping); if (columnNodeMapping != null) mapping.putAll(columnNodeMapping); for (LabeledLink e : model.getGraph().edgeSet()) { source = e.getSource(); target = e.getTarget(); n1 = mapping.get(source); if (n1 == null) { // logger.warn("the mappings does not include the source node " + source.getId()); continue; } n2 = mapping.get(target); if (n2 == null) { // logger.warn("the mappings does not include the target node " + target.getId()); continue; } String id = LinkIdFactory.getLinkId(e.getUri(), n1.getId(), n2.getId()); LabeledLink l = this.graphBuilder.getIdToLinkMap().get(id); if (l != null) { int numOfPatterns = l.getModelIds().size(); if (weightSystem == PatternWeightSystem.OriginalWeights) { double currentW = l.getWeight(); double newW = model.getGraph().getEdgeWeight(e); if (newW < currentW) this.graphBuilder.changeLinkWeight(l, newW); } else if (weightSystem == PatternWeightSystem.JWSPaperFormula) { if (n2 instanceof InternalNode) { // wl - x/(n+1) // wl = 1 // x = (numOfPatterns + 1) // n = totalNumberOfPatterns this.graphBuilder.changeLinkWeight(l, ModelingParams.PATTERN_LINK_WEIGHT - ((double) (numOfPatterns + 1) / (double) (this.totalNumberOfKnownModels + 1) )); // this.graphBuilder.changeLinkWeight(l, ModelingParams.PATTERN_LINK_WEIGHT - (0.00001 * numOfPatterns) ); } else { this.graphBuilder.changeLinkWeight(l, ModelingParams.PATTERN_LINK_WEIGHT); } } else { this.graphBuilder.changeLinkWeight(l, ModelingParams.PATTERN_LINK_WEIGHT); } l.getModelIds().add(modelId); n1.getModelIds().add(modelId); n2.getModelIds().add(modelId); } else { // System.out.println("added links: " + i); // i++; LabeledLink link = e.copy(id); if (link == null) { logger.error("cannot instanciate a link from the type: " + e.getType().toString()); continue; } link.setStatus(LinkStatus.Normal); // all the links in learning graph are normal if (link.getModelIds() != null) link.getModelIds().clear(); link.getModelIds().add(modelId); if (weightSystem == PatternWeightSystem.OriginalWeights) { if (!this.graphBuilder.addLink(n1, n2, link, model.getGraph().getEdgeWeight(e))) continue; } else { if (!this.graphBuilder.addLink(n1, n2, link, ModelingParams.PATTERN_LINK_WEIGHT)) continue; } n1.getModelIds().add(modelId); n2.getModelIds().add(modelId); } } DefaultLink[] graphLinks = this.graphBuilder.getGraph().edgeSet().toArray(new DefaultLink[0]); for (DefaultLink e : graphLinks) { source = e.getSource(); target = e.getTarget(); if (source instanceof InternalNode && target instanceof InternalNode && e instanceof LabeledLink) { LabeledLink l = (LabeledLink)e; Set<Node> nodesWithSourceUri = this.graphBuilder.getUriToNodesMap().get(source.getUri()); Set<Node> nodesWithTargetUri = this.graphBuilder.getUriToNodesMap().get(target.getUri()); if (nodesWithSourceUri == null || nodesWithTargetUri == null) continue; for (Node nn1 : nodesWithSourceUri) { for (Node nn2 : nodesWithTargetUri) { if (nn1.equals(source) && nn2.equals(target)) continue; if (nn1.equals(nn2)) continue; String id = LinkIdFactory.getLinkId(l.getUri(), nn1.getId(), nn2.getId()); LabeledLink newLink = l.copy(id); newLink.setModelIds(null); this.graphBuilder.addLink(nn1, nn2, newLink, ModelingParams.PATTERN_LINK_WEIGHT); } } } } } @Override public Set<InternalNode> addModel(SemanticModel model, PatternWeightSystem weightSystem) { // adding the patterns to the graph if (model == null) return null; // String modelId = model.getId(); // if (this.graphBuilder.getModelIds().contains(modelId)) { // // FIXME // // we need to somehow update the graph, but I don't know how to do that yet. // // so, we rebuild the whole graph from scratch. // logger.info("the graph already includes the model and needs to be updated, we re-initialize the graph from the repository!"); // initializeFromJsonRepository(); // return null; // } this.totalNumberOfKnownModels ++; // add the model nodes that are not in the graph Set<InternalNode> addedInternalNodes = new HashSet<>(); this.addInternalNodes(model, addedInternalNodes); HashMap<Node, Node> internalNodeMapping = this.getInternalNodeMapping(model); HashMap<Node, Node> columnNodeMapping = this.getColumnNodeMapping(model, internalNodeMapping); this.addLinks(model, internalNodeMapping, columnNodeMapping, weightSystem); this.lastUpdateTime = System.currentTimeMillis(); return addedInternalNodes; } public Set<InternalNode> addLodPattern(SemanticModel model) { Set<InternalNode> addedNodes = new HashSet<>(); if (model == null) return addedNodes; String modelId = model.getId(); Node source, target; Node n1, n2; String sourceUri, targetUri, linkUri; String id, key; Set<String> mappedLinks = new HashSet<>(); Set<String> mappedNodes = new HashSet<>(); HashMap<Node,Node> mapping = new HashMap<>(); HashMap<String, List<Node>> uriMatchedNodes = new HashMap<>(); List<LabeledLink> sortedLinks = new ArrayList<>(); List<LabeledLink> idSortedLinks = new ArrayList<>(); idSortedLinks.addAll(model.getGraph().edgeSet()); Collections.sort(idSortedLinks); for (LabeledLink l : idSortedLinks) { key = l.getSource().getUri() + l.getUri() + l.getTarget().getUri(); if (this.graphBuilder.getPatternLinks().get(key) != null) { sortedLinks.add(l); } } for (LabeledLink l : idSortedLinks) { if (!sortedLinks.contains(l)) { sortedLinks.add(l); } } sortedLinks.addAll(model.getGraph().edgeSet()); // System.out.println("new pattern ..."); for (LabeledLink e : sortedLinks) { source = e.getSource(); target = e.getTarget(); // if (model.getId().equals("p4-022E14EC-57EC-9F3B-CBCD-F64FDFE95609")) { // System.out.println(GraphUtil.labeledGraphToString(model.getGraph())); // System.out.println(e.getId()); // } // if (source.getId().contains("E42_Identifier") && // target.getId().contains("E55_Type")) { // System.out.println(GraphUtil.labeledGraphToString(model.getGraph())); // System.out.println("debug"); // } sourceUri = source.getUri(); targetUri = target.getUri(); linkUri = e.getUri(); n1 = mapping.get(source); n2 = mapping.get(target); key = sourceUri + linkUri + targetUri; // if (target.getId().contains("E52_Time-Span2")) { // System.out.println(GraphUtil.labeledGraphToString(model.getGraph())); // System.out.println("debug"); // } List<LabeledLink> matchedLinks = null; if (n1 == null && n2 == null) { matchedLinks = this.graphBuilder.getPatternLinks().get(key); if (matchedLinks != null && !matchedLinks.isEmpty()) { Collections.sort(matchedLinks); for (LabeledLink l : matchedLinks) { if (!mappedLinks.contains(l.getId())) { mappedLinks.add(l.getId()); n1 = l.getSource(); n2 = l.getTarget(); mapping.put(source,n1); mapping.put(target, n2); mappedNodes.add(n1.getId()); mappedNodes.add(n2.getId()); break; } } } } else if (n1 == null) { // target is already mapped matchedLinks = this.graphBuilder.getPatternLinks().get(key); if (matchedLinks != null && !matchedLinks.isEmpty()) { for (LabeledLink l : matchedLinks) { if (!mappedLinks.contains(l.getId()) && !mappedNodes.contains(l.getSource().getId()) && l.getTarget().getId().equalsIgnoreCase(n2.getId())) { mappedLinks.add(l.getId()); n1 = l.getSource(); mapping.put(source,n1); mappedNodes.add(n1.getId()); break; } } } } else if (n2 == null) { matchedLinks = this.graphBuilder.getPatternLinks().get(key); if (matchedLinks != null && !matchedLinks.isEmpty()) { for (LabeledLink l : matchedLinks) { if (!mappedLinks.contains(l.getId()) && !mappedNodes.contains(l.getTarget().getId()) && l.getSource().getId().equalsIgnoreCase(n1.getId())) { mappedLinks.add(l.getId()); n2 = l.getTarget(); mapping.put(target,n2); mappedNodes.add(n2.getId()); break; } } } } if (n1 == null) { List<Node> sortedMatchedNodes = uriMatchedNodes.get(sourceUri); if (sortedMatchedNodes == null) { sortedMatchedNodes = new LinkedList<>(); Set<Node> matchedNodes = this.graphBuilder.getUriToNodesMap().get(sourceUri); if (matchedNodes != null && !matchedNodes.isEmpty()) { sortedMatchedNodes.addAll(matchedNodes); Collections.sort(sortedMatchedNodes, new NodeSupportingModelsComparator()); while (!sortedMatchedNodes.isEmpty()) { if (mappedNodes.contains(sortedMatchedNodes.get(0).getId())) { sortedMatchedNodes.remove(0); continue; } Set<DefaultLink> outLinks = this.getGraphBuilder().getGraph().outgoingEdgesOf(sortedMatchedNodes.get(0)); boolean okLink = true; if (outLinks != null) { for (DefaultLink dl : outLinks) { if (dl instanceof LabeledLink && dl.getUri().equalsIgnoreCase(linkUri)) { sortedMatchedNodes.remove(0); okLink = false; break; } } if (!okLink) continue; } break; } if (!sortedMatchedNodes.isEmpty()) { n1 = sortedMatchedNodes.get(0); mappedNodes.add(n1.getId()); mapping.put(source, n1); sortedMatchedNodes.remove(0); } uriMatchedNodes.put(sourceUri, sortedMatchedNodes); } } if (n1 == null) { id = this.nodeIdFactory.getNodeId(sourceUri); n1 = new InternalNode(id, new Label(sourceUri)); if (this.graphBuilder.addNode(n1)) { mapping.put(source, n1); mappedNodes.add(n1.getId()); addedNodes.add((InternalNode)n1); } else { System.out.println("Error in adding the node " + id + " to the graph."); } } } if (n2 == null) { // if(model.getId().equals("p4-06B7640A-8E23-B427-4B46-B1C9C194BDD7")) // { // System.out.println(GraphUtil.labeledGraphToString(model.getGraph())); // } List<Node> sortedMatchedNodes = uriMatchedNodes.get(targetUri); if (sortedMatchedNodes == null) { sortedMatchedNodes = new LinkedList<>(); Set<Node> matchedNodes = this.graphBuilder.getUriToNodesMap().get(targetUri); if (matchedNodes != null && !matchedNodes.isEmpty()) { sortedMatchedNodes.addAll(matchedNodes); Collections.sort(sortedMatchedNodes, new NodeSupportingModelsComparator()); while (!sortedMatchedNodes.isEmpty()) { if (mappedNodes.contains(sortedMatchedNodes.get(0).getId())) { sortedMatchedNodes.remove(0); continue; } Set<DefaultLink> inLinks = this.getGraphBuilder().getGraph().incomingEdgesOf(sortedMatchedNodes.get(0)); boolean okLink = true; if (inLinks != null) { for (DefaultLink dl : inLinks) { if (dl instanceof LabeledLink && dl.getUri().equalsIgnoreCase(linkUri)) { sortedMatchedNodes.remove(0); okLink = false; break; } } if (!okLink) continue; } break; } if (!sortedMatchedNodes.isEmpty()) { n2 = sortedMatchedNodes.get(0); mappedNodes.add(n2.getId()); mapping.put(target, n2); sortedMatchedNodes.remove(0); } uriMatchedNodes.put(targetUri, sortedMatchedNodes); } } if (n2 == null) { id = this.nodeIdFactory.getNodeId(targetUri); // if (id.contains("E55_Type10")) // System.out.println("break"); n2 = new InternalNode(id, new Label(targetUri)); if (this.graphBuilder.addNode(n2)) { mapping.put(target, n2); addedNodes.add((InternalNode)n2); mappedNodes.add(n2.getId()); } else { System.out.println("Error in adding the node " + id + " to the graph."); } } } // if (n1 == null || n2 == null) { // System.out.println(GraphUtil.labeledGraphToString(model.getGraph())); // System.out.println("debug"); // } id = LinkIdFactory.getLinkId(e.getUri(), n1.getId(), n2.getId()); LabeledLink l = this.graphBuilder.getIdToLinkMap().get(id); if (l != null) { this.graphBuilder.changeLinkWeight(l, ModelingParams.PATTERN_LINK_WEIGHT); l.getModelIds().add(modelId); n1.getModelIds().add(modelId); n2.getModelIds().add(modelId); } else { LabeledLink link = e.copy(id); if (link == null) { logger.error("cannot instanciate a link from the type: " + e.getType().toString()); continue; } link.setStatus(LinkStatus.Normal); // all the links in learning graph are normal if (link.getModelIds() != null) link.getModelIds().clear(); link.getModelIds().add(modelId); if (!this.graphBuilder.addLink(n1, n2, link, ModelingParams.PATTERN_LINK_WEIGHT)) continue; this.graphBuilder.savePatternLink(link); n1.getModelIds().add(modelId); n2.getModelIds().add(modelId); } } return addedNodes; } public static void main(String[] args) throws Exception { ServletContextParameterMap contextParameters = ContextParametersRegistry.getInstance().getDefault(); OntologyManager ontologyManager = new OntologyManager(contextParameters.getId()); File ff = new File(Params.ONTOLOGY_DIR); File[] files = ff.listFiles(); for (File f : files) { ontologyManager.doImport(f, "UTF-8"); } ontologyManager.updateCache(); List<SemanticModel> semanticModels = ModelReader.importSemanticModelsFromJsonFiles(Params.MODEL_DIR, Params.MODEL_MAIN_FILE_EXT); String graphPath = Params.GRAPHS_DIR; String graphName = graphPath + "graph.json"; String graphVizName = graphPath + "graph.dot"; ModelLearningGraph ml = ModelLearningGraph.getEmptyInstance(ontologyManager, ModelLearningGraphType.Compact); int i = 0; Set<InternalNode> addedNodes = new HashSet<>(); Set<InternalNode> temp; for (SemanticModel sm : semanticModels) { i++; if (i == 4) continue; System.out.println(sm.getId()); temp = ml.addModel(sm, PatternWeightSystem.JWSPaperFormula); if (temp != null) addedNodes.addAll(temp); } ml.updateGraphUsingOntology(addedNodes); try { GraphUtil.exportJson(ml.getGraphBuilder().getGraph(), graphName, true, true); GraphVizUtil.exportJGraphToGraphviz(ml.getGraphBuilder().getGraph(), "main graph", true, GraphVizLabelType.LocalId, GraphVizLabelType.LocalUri, false, false, graphVizName); } catch (Exception e) { e.printStackTrace(); } } }