/******************************************************************************* * Copyright 2012 University of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This code was developed by the Information Integration Group as part * of the Karma project at the Information Sciences Institute of the * University of Southern California. For more information, publications, * and related projects, please see: http://www.isi.edu/integration ******************************************************************************/ package edu.isi.karma.modeling.alignment.learner; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import edu.isi.karma.modeling.ModelingParams; import edu.isi.karma.modeling.alignment.GraphUtil; import edu.isi.karma.modeling.alignment.GraphVizLabelType; import edu.isi.karma.modeling.alignment.GraphVizUtil; import edu.isi.karma.modeling.alignment.LinkIdFactory; import edu.isi.karma.modeling.alignment.SemanticModel; import edu.isi.karma.modeling.ontology.OntologyManager; import edu.isi.karma.modeling.research.Params; import edu.isi.karma.rep.alignment.ColumnNode; import edu.isi.karma.rep.alignment.DataPropertyLink; import edu.isi.karma.rep.alignment.InternalNode; import edu.isi.karma.rep.alignment.Label; import edu.isi.karma.rep.alignment.LabeledLink; import edu.isi.karma.rep.alignment.LinkStatus; import edu.isi.karma.rep.alignment.Node; import edu.isi.karma.util.RandomGUID; import edu.isi.karma.webserver.ContextParametersRegistry; import edu.isi.karma.webserver.ServletContextParameterMap; public class ModelLearningGraphCompact_Old extends ModelLearningGraph { private static Logger logger = LoggerFactory.getLogger(ModelLearningGraphCompact_Old.class); private static int MAX_MAPPING_SIZE = 1000; public ModelLearningGraphCompact_Old(OntologyManager ontologyManager) throws IOException { super(ontologyManager, ModelLearningGraphType.Compact); } public ModelLearningGraphCompact_Old(OntologyManager ontologyManager, boolean emptyInstance) { super(ontologyManager, emptyInstance, ModelLearningGraphType.Compact); } // protected static ModelLearningGraphCompact getInstance(OntologyManager ontologyManager) { // return (ModelLearningGraphCompact)ModelLearningGraph.getInstance(ontologyManager, ModelLearningGraphType.Compact); // } // // protected static ModelLearningGraphCompact getEmptyInstance(OntologyManager ontologyManager) { // return (ModelLearningGraphCompact)ModelLearningGraph.getEmptyInstance(ontologyManager, ModelLearningGraphType.Compact); // } private HashMap<Node,Set<Node>> addInternalNodes(SemanticModel model, Set<InternalNode> addedNodes) { if (model == null || model.getGraph() == null) return null; HashMap<Node,Set<Node>> internalNodeMatches = new HashMap<>(); if (addedNodes == null) addedNodes = new HashSet<>(); HashMap<String, Integer> uriCount = new HashMap<>(); for (Node n : model.getGraph().vertexSet()) { if (n instanceof InternalNode) { Integer count = uriCount.get(n.getUri()); if (count == null) uriCount.put(n.getUri(), 1); else uriCount.put(n.getUri(), count.intValue() + 1); } } for (Map.Entry<String, Integer> stringIntegerEntry : uriCount.entrySet()) { int modelNodeCount = stringIntegerEntry.getValue(); Set<Node> matchedNodes = this.graphBuilder.getUriToNodesMap().get(stringIntegerEntry.getKey()); int graphNodeCount = matchedNodes == null ? 0 : matchedNodes.size(); for (int i = 0; i < modelNodeCount - graphNodeCount; i++) { String id = this.nodeIdFactory.getNodeId(stringIntegerEntry.getKey()); Node n = new InternalNode(id, new Label(stringIntegerEntry.getKey())); if (this.graphBuilder.addNode(n)) addedNodes.add((InternalNode)n); } } for (Node n : model.getGraph().vertexSet()) { if (n instanceof InternalNode) { Set<Node> matchedNodes = this.graphBuilder.getUriToNodesMap().get(n.getUri()); internalNodeMatches.put(n, matchedNodes); } } return internalNodeMatches; } private HashMap<Node,Set<Node>> addColumnNodes(SemanticModel model, HashMap<Node,Set<LabeledLink>> modelNodeDomains, HashMap<Node,Set<LabeledLink>> graphNodeDomains) { if (model == null || model.getGraph() == null) return null; if (modelNodeDomains == null) modelNodeDomains = new HashMap<>(); if (graphNodeDomains == null) graphNodeDomains = new HashMap<>(); HashMap<Node,Set<Node>> columnNodeMatches = new HashMap<>(); HashMap<String, Integer> dataPropertyCount = new HashMap<>(); // key = domainUri + propertyUri for (Node n : model.getGraph().vertexSet()) { if (n instanceof ColumnNode) { Set<LabeledLink> domainLinks = GraphUtil.getDomainLinksInLabeledGraph(model.getGraph(), (ColumnNode)n); if (domainLinks == null || domainLinks.isEmpty()) continue; for (LabeledLink l : domainLinks) { if (l.getSource() == null) continue; Node domain = l.getSource(); String linkUri = l.getUri(); String key = domain.getId() + linkUri; Integer count = dataPropertyCount.get(key); if (count == null) dataPropertyCount.put(key, 1); else dataPropertyCount.put(key, count.intValue() + 1); } modelNodeDomains.put(n, domainLinks); } } for (Node n : model.getGraph().vertexSet()) { Set<Node> matches = new HashSet<>(); if (n instanceof ColumnNode) { Set<LabeledLink> domainLinks = modelNodeDomains.get(n); if (domainLinks == null || domainLinks.isEmpty()) continue; for (LabeledLink l : domainLinks) { if (l.getSource() == null) continue; Node domain = l.getSource(); LabeledLink incomingLink = l; Set<Node> matchedNodes = this.graphBuilder.getUriToNodesMap().get(domain.getUri()); if (matchedNodes == null || matchedNodes.isEmpty()) { logger.error("no match found for the node " + domain.getUri() + " in the graph"); return null; } for (Node m : matchedNodes) { String graphKey = m.getId() + incomingLink.getUri(); Set<Node> dataPropertyColumnNodes = this.graphBuilder.getNodeDataProperties().get(graphKey); Integer graphDataPropertyCount = this.graphBuilder.getNodeDataPropertyCount().get(graphKey); if (graphDataPropertyCount == null) graphDataPropertyCount = 0; if (dataPropertyColumnNodes != null) { for (Node cn : dataPropertyColumnNodes) { if (cn instanceof ColumnNode) { matches.add(cn); graphNodeDomains.put(cn, GraphUtil.getDomainLinksInDefaultGraph(this.graphBuilder.getGraph(), (ColumnNode)cn)); } } } String modelKey = domain.getId() + incomingLink.getUri(); int modelDataPropertyCount = dataPropertyCount.get(modelKey); for (int i = 0; i < modelDataPropertyCount - graphDataPropertyCount; i++) { Node newNode = null; if (n instanceof ColumnNode) { ColumnNode c = (ColumnNode)n; newNode = new ColumnNode(new RandomGUID().toString(), c.getHNodeId(), c.getColumnName(), c.getRdfLiteralType(), c.getLanguage()); } if (newNode == null) { return null; } if (this.graphBuilder.addNode(newNode)) { String linkId = LinkIdFactory.getLinkId(incomingLink.getUri(), m.getId(), newNode.getId()); DataPropertyLink link = new DataPropertyLink(linkId, new Label(incomingLink.getLabel())); this.graphBuilder.addLink(m, newNode, link, ModelingParams.PATTERN_LINK_WEIGHT); matches.add(newNode); graphNodeDomains.put(newNode, GraphUtil.getDomainLinksInDefaultGraph(this.graphBuilder.getGraph(), (ColumnNode)newNode)); } } } } columnNodeMatches.put(n, matches); } } return columnNodeMatches; } private List<HashMap<Node,Node>> updateMapping(List<HashMap<Node,Node>> mappings, Node node, int size, HashMap<Node, Set<Node>> internalNodeMatches, HashMap<Node, Set<Node>> columnNodeMatches, HashMap<Node,Set<LabeledLink>> modelNodeDomains, HashMap<Node,Set<LabeledLink>> graphNodeDomains) { // System.out.println("node: " + node.getId()); List<HashMap<Node,Node>> newMappings = new LinkedList<>(); Set<Node> matchedNodes = null; if (node instanceof InternalNode && internalNodeMatches != null) matchedNodes = internalNodeMatches.get(node); else if (node instanceof ColumnNode && columnNodeMatches != null) matchedNodes = columnNodeMatches.get(node); if (matchedNodes == null) { return null;//mappings; } if (matchedNodes == null || matchedNodes.isEmpty()) { logger.error("no match found for the node " + node.getId() + " in the graph"); return null; } if (mappings.isEmpty()) { for (Node n : matchedNodes) { HashMap<Node,Node> nodeMap = new HashMap<>(); nodeMap.put(node, n); newMappings.add(nodeMap); // System.out.println("\t\t" + n.getId()); } } else { for (int i = 0; i < mappings.size(); i++) { HashMap<Node,Node> nodeMap = mappings.get(i); Set<Node> correspondingMatches = new HashSet<>(); for (Node n : matchedNodes) { if (n instanceof ColumnNode) { Set<LabeledLink> modelDomainLinks = modelNodeDomains.get(node); if (modelDomainLinks != null) { for (LabeledLink l : modelDomainLinks) { if (l.getSource() != null) { if (nodeMap.containsKey(l.getSource())) { correspondingMatches.add(nodeMap.get(l.getSource())); } } } } Set<LabeledLink> graphDomainLinks = graphNodeDomains.get(n); Set<Node> domainNodes = new HashSet<>(); if (graphDomainLinks != null) { for (LabeledLink l : graphDomainLinks) { if (l.getSource() != null) { domainNodes.add(l.getSource()); } } } boolean found = false; if (domainNodes != null) { for (Node domain : domainNodes) { if (correspondingMatches.contains(domain)) found = true; } } if (!found) continue; } HashMap<Node,Node> newMapping = new HashMap<>(nodeMap); newMapping.put(node, n); if (new HashSet<>(newMapping.values()).size() != size) continue; // for (Node nnn : newMapping.values()) { // System.out.println("\t\t" + nnn.getId()); // } newMappings.add(newMapping); } } } return newMappings; } private List<HashMap<Node,Node>> findMappings(SemanticModel model, HashMap<Node, Set<Node>> internalNodeMatches, HashMap<Node, Set<Node>> columnNodeMatches, HashMap<Node,Set<LabeledLink>> modelNodeDomains, HashMap<Node,Set<LabeledLink>> graphNodeDomains) { if (model == null || model.getGraph() == null) return null; List<HashMap<Node,Node>> mappings = new LinkedList<>(); // logger.info("max mapping size: " + MAX_MAPPING_SIZE); int size = 0; for (Node node : model.getGraph().vertexSet()) { if (node instanceof InternalNode) { size ++; mappings = updateMapping(mappings, node, size, internalNodeMatches, columnNodeMatches, modelNodeDomains, graphNodeDomains); // System.out.println(mappings.size()); if (mappings != null && mappings.size() >= MAX_MAPPING_SIZE) mappings = mappings.subList(0, MAX_MAPPING_SIZE); // System.out.println(mappings.size()); } } for (Node node : model.getGraph().vertexSet()) { if (node instanceof ColumnNode) { size ++; mappings = updateMapping(mappings, node, size, internalNodeMatches, columnNodeMatches, modelNodeDomains, graphNodeDomains); // System.out.println(mappings.size()); if (mappings != null && mappings.size() >= MAX_MAPPING_SIZE) mappings = mappings.subList(0, MAX_MAPPING_SIZE); // System.out.println(mappings.size()); } } return mappings; } private String generateLinkModelId(String originalModelId, int index) { String separator = "/"; String modelId = originalModelId + separator + index; return modelId; } @Override public Set<InternalNode> addModel(SemanticModel model, PatternWeightSystem weightSystem) { // adding the patterns to the graph if (model == null) return null; String modelId = model.getId(); if (this.graphBuilder.getModelIds().contains(modelId)) { // FIXME // we need to somehow update the graph, but I don't know how to do that yet. // so, we rebuild the whole graph from scratch. logger.info("the graph already includes the model and needs to be updated, we re-initialize the graph from the repository!"); initializeFromJsonRepository(); return null; } // add the model nodes that are not in the graph Set<InternalNode> addedInternalNodes = new HashSet<>(); HashMap<Node, Set<Node>> internalNodeMatches = addInternalNodes(model, addedInternalNodes); // if (modelId.equalsIgnoreCase("s21-s-met.json")) // for (Entry<Node, Set<Node>> entry : internalNodeMatches.entrySet()) { // System.out.println(entry.getKey().getId() + "--> size: " + entry.getValue().size()); // for (Node n : entry.getValue()) { // System.out.println("\t" + n.getId()); // } // } HashMap<Node,Set<LabeledLink>> modelNodeDomains = new HashMap<>(); HashMap<Node,Set<LabeledLink>> graphNodeDomains = new HashMap<>(); HashMap<Node, Set<Node>> columnNodeMatches = addColumnNodes(model, modelNodeDomains, graphNodeDomains); // if (modelId.equalsIgnoreCase("s21-s-met.json")) // for (Entry<Node, Set<Node>> entry : columnNodeMatches.entrySet()) { // System.out.println(((ColumnNode)entry.getKey()).getColumnName() + "--> size: " + entry.getValue().size()); // for (Node n : entry.getValue()) { // System.out.println("\t" + ((ColumnNode)n).getColumnName()); // } // } // find possible mappings between models nodes and the graph nodes List<HashMap<Node,Node>> mappings = findMappings(model, internalNodeMatches, columnNodeMatches, modelNodeDomains, graphNodeDomains); if (mappings == null) { return null; } logger.debug(model.getId() + " --> number of mappings: " + mappings.size()); Node source, target; Node n1, n2; int index = 1; // int i = 0; for (HashMap<Node,Node> mapping : mappings) { String indexedModelId = generateLinkModelId(modelId, index++); // modelId for (LabeledLink e : model.getGraph().edgeSet()) { source = e.getSource(); target = e.getTarget(); n1 = mapping.get(source); if (n1 == null) { // logger.warn("the mappings does not include the source node " + source.getId()); continue; } n2 = mapping.get(target); if (n2 == null) { // logger.warn("the mappings does not include the target node " + target.getId()); continue; } String id = LinkIdFactory.getLinkId(e.getUri(), n1.getId(), n2.getId()); LabeledLink l = this.graphBuilder.getIdToLinkMap().get(id); if (l != null) { int numOfPatterns = l.getModelIds().size(); // this.graphBuilder.changeLinkWeight(l, ModelingParams.PATTERN_LINK_WEIGHT); // this.graphBuilder.changeLinkWeight(l, ModelingParams.PATTERN_LINK_WEIGHT / (double) (numOfPatterns + 1) ); if (weightSystem == PatternWeightSystem.OriginalWeights) { double currentW = l.getWeight(); double newW = model.getGraph().getEdgeWeight(e); if (newW < currentW) this.graphBuilder.changeLinkWeight(l, newW); } else if (weightSystem == PatternWeightSystem.JWSPaperFormula) { if (n2 instanceof InternalNode) { // wl - x/(n+1) // wl = 1 // x = (numOfPatterns + 1) // n = totalNumberOfPatterns this.graphBuilder.changeLinkWeight(l, ModelingParams.PATTERN_LINK_WEIGHT - ((double) (numOfPatterns + 1) / (double) (this.totalNumberOfKnownModels + 1) )); // this.graphBuilder.changeLinkWeight(l, ModelingParams.PATTERN_LINK_WEIGHT - (0.00001 * numOfPatterns) ); } else { this.graphBuilder.changeLinkWeight(l, ModelingParams.PATTERN_LINK_WEIGHT); } } else { this.graphBuilder.changeLinkWeight(l, ModelingParams.PATTERN_LINK_WEIGHT); } l.getModelIds().add(indexedModelId); n1.getModelIds().add(indexedModelId); n2.getModelIds().add(indexedModelId); } else { // System.out.println("added links: " + i); // i++; LabeledLink link = e.copy(id); if (link == null) { logger.error("cannot instanciate a link from the type: " + e.getType().toString()); continue; } link.setStatus(LinkStatus.Normal); // all the links in learning graph are normal if (link.getModelIds() != null) link.getModelIds().clear(); link.getModelIds().add(indexedModelId); if (weightSystem == PatternWeightSystem.OriginalWeights) { if (!this.graphBuilder.addLink(n1, n2, link, model.getGraph().getEdgeWeight(e))) continue; } else { if (!this.graphBuilder.addLink(n1, n2, link, ModelingParams.PATTERN_LINK_WEIGHT)) continue; } n1.getModelIds().add(indexedModelId); n2.getModelIds().add(indexedModelId); } } } this.lastUpdateTime = System.currentTimeMillis(); return addedInternalNodes; } public static void main(String[] args) throws Exception { ServletContextParameterMap contextParameters = ContextParametersRegistry.getInstance().getDefault(); OntologyManager ontologyManager = new OntologyManager(contextParameters.getId()); File ff = new File(Params.ONTOLOGY_DIR); File[] files = ff.listFiles(); for (File f : files) { ontologyManager.doImport(f, "UTF-8"); } ontologyManager.updateCache(); List<SemanticModel> semanticModels = ModelReader.importSemanticModelsFromJsonFiles(Params.MODEL_DIR, Params.MODEL_MAIN_FILE_EXT); String graphPath = Params.GRAPHS_DIR; String graphName = graphPath + "graph.json"; String graphVizName = graphPath + "graph.dot"; ModelLearningGraph ml = ModelLearningGraph.getEmptyInstance(ontologyManager, ModelLearningGraphType.Compact); int i = 0; Set<InternalNode> addedNodes = new HashSet<>(); Set<InternalNode> temp; for (SemanticModel sm : semanticModels) { i++; if (i == 4) continue; System.out.println(sm.getId()); temp = ml.addModel(sm, PatternWeightSystem.JWSPaperFormula); if (temp != null) addedNodes.addAll(temp); } ml.updateGraphUsingOntology(addedNodes); try { GraphUtil.exportJson(ml.getGraphBuilder().getGraph(), graphName, true, true); GraphVizUtil.exportJGraphToGraphviz(ml.getGraphBuilder().getGraph(), "main graph", true, GraphVizLabelType.LocalId, GraphVizLabelType.LocalUri, false, false, graphVizName); } catch (Exception e) { e.printStackTrace(); } } }