/******************************************************************************* * Copyright 2012 University of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This code was developed by the Information Integration Group as part * of the Karma project at the Information Sciences Institute of the * University of Southern California. For more information, publications, * and related projects, please see: http://www.isi.edu/integration ******************************************************************************/ package edu.isi.karma.modeling.research.approach1; import java.io.File; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import org.apache.log4j.Logger; import org.jgrapht.UndirectedGraph; import org.jgrapht.graph.AsUndirectedGraph; import org.jgrapht.graph.DirectedWeightedMultigraph; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Multimap; import edu.isi.karma.modeling.ModelingParams; import edu.isi.karma.modeling.Uris; import edu.isi.karma.modeling.alignment.GraphBuilder; import edu.isi.karma.modeling.alignment.GraphUtil; import edu.isi.karma.modeling.alignment.LinkIdFactory; import edu.isi.karma.modeling.alignment.NodeIdFactory; import edu.isi.karma.modeling.alignment.SteinerTree; import edu.isi.karma.modeling.ontology.OntologyManager; import edu.isi.karma.modeling.research.GraphVizUtil; import edu.isi.karma.modeling.research.ModelReader; import edu.isi.karma.modeling.research.Params; import edu.isi.karma.modeling.research.PatternContainment; import edu.isi.karma.modeling.research.SemanticLabel; import edu.isi.karma.modeling.research.ServiceModel; import edu.isi.karma.modeling.research.Util; import edu.isi.karma.rep.alignment.ColumnNode; import edu.isi.karma.rep.alignment.DataPropertyLink; import edu.isi.karma.rep.alignment.InternalNode; import edu.isi.karma.rep.alignment.Label; import edu.isi.karma.rep.alignment.Link; import edu.isi.karma.rep.alignment.LiteralNode; import edu.isi.karma.rep.alignment.Node; import edu.isi.karma.rep.alignment.NodeType; import edu.isi.karma.rep.alignment.ObjectPropertyLink; import edu.isi.karma.rep.alignment.SimpleLink; import edu.isi.karma.rep.alignment.SubClassLink; //import com.google.common.base.Function; //import com.google.common.collect.Multimap; //import com.google.common.collect.Multimaps; public class Approach1 { private static Logger logger = Logger.getLogger(Approach1.class); private HashMap<SemanticLabel, Set<MappingStruct>> labelToMappingStructs; private NodeIdFactory nodeIdFactory; private List<ServiceModel> trainingData; private OntologyManager ontologyManager; private GraphBuilder graphBuilder; private Set<DirectedWeightedMultigraph<Node, Link>> graphComponents; private static final int MAX_CANDIDATES = 5; private static final int MAX_STEINER_NODES_SETS = 100; private HashSet<Link> patternLinks; private HashMap<String, Integer> linkCountMap; private Multimap<String, String> sourceToTargetLinks; private class LinkFrequency implements Comparable<LinkFrequency>{ public LinkFrequency(String linkUri, int type, int count) { this.linkUri = linkUri; this.type = type; this.count = count; } private String linkUri; private int type; private int count; public double getWeight() { double weight = 0.0; double w = ModelingParams.PROPERTY_DIRECT_WEIGHT; double epsilon = ModelingParams.PATTERN_LINK_WEIGHT; // double factor = 0.01; int c = this.count < (int)w ? this.count : (int)w - 1; if (type == 1) // match domain, link, and range weight = w - (epsilon / (w - c)); else if (type == 2) // match link and range weight = w - (epsilon / ((w - c) * w)); else if (type == 3) // match domain and link weight = w - (epsilon / ((w - c) * w)); else if (type == 4) // match link weight = w - (epsilon / ((w - c) * w * w)); else if (type == 5) // direct property weight = w; else if (type == 6) // indirect property weight = w + epsilon - (epsilon / (w - c)); else if (type == 7) // property with only domain weight = w + epsilon + (epsilon / ((w - c) * w)); else if (type == 8) // property with only range weight = w + epsilon + (epsilon / ((w - c) * w)); else if (type == 9) // subClass weight = w + epsilon + (epsilon / ((w - c) * w * w)); else if (type == 10) // property without domain and range weight = w + epsilon + (epsilon / ((w - c) * w * w * w)); return weight; } @Override public int compareTo(LinkFrequency o) { if (linkUri == null && o.linkUri != null) return -1; else if (linkUri != null && o.linkUri == null) return 1; else if (linkUri == null && o.linkUri == null) return 0; else { if (type < o.type) return 1; else if (type > o.type) return -1; else { if (count >= o.count) return 1; else return -1; } } } } public Approach1(List<ServiceModel> trainingData, OntologyManager ontologyManager) { this.graphComponents = new HashSet<DirectedWeightedMultigraph<Node,Link>>(); this.trainingData = trainingData; this.ontologyManager = ontologyManager; // this.linkIdFactory = new LinkIdFactory(); this.nodeIdFactory = new NodeIdFactory(); this.graphBuilder = new GraphBuilder(ontologyManager, nodeIdFactory);//, linkIdFactory); this.labelToMappingStructs = new HashMap<SemanticLabel, Set<MappingStruct>>(); this.patternLinks = new HashSet<Link>(); this.linkCountMap = new HashMap<String, Integer>(); this.sourceToTargetLinks = ArrayListMultimap.create(); this.buildLinkCountMap(); } public DirectedWeightedMultigraph<Node, Link> getGraph() { return this.graphBuilder.getGraph(); } public void saveGraph(String fileName) throws Exception { GraphUtil.serialize(this.graphBuilder.getGraph(), fileName); } public void loadGraph(OntologyManager ontologyManager, String fileName) throws Exception { DirectedWeightedMultigraph<Node, Link> graph = GraphUtil.deserialize(fileName); this.graphBuilder = new GraphBuilder(ontologyManager, graph); this.nodeIdFactory = this.graphBuilder.getNodeIdFactory(); // this.linkIdFactory = this.graphBuilder.getLinkIdFactory(); this.updateHashMaps(); } private void updateGraphWithUserLinks() { // String[] parts; // String sourceUri, targetUri; // String linkId; // // for (String s : this.sourceToTargetLinks.keys()) { // // parts = s.split("---"); // if (parts == null || parts.length != 2) continue; // sourceUri = parts[0]; targetUri = parts[1]; // // List<Node> sources = this.graphBuilder.getUriToNodesMap().get(sourceUri); // List<Node> targets = this.graphBuilder.getUriToNodesMap().get(targetUri); // // // for (Node source : sources) { // for (Node target : targets) { // if (!this.graphBuilder.isConnected(source.getId(), target.getId())) { // linkId = LinkIdFactory.getLinkId(SimpleLink.getFixedLabel().getUri(), source.getId(), target.getId()); // Link link = new SimpleLink(linkId, SimpleLink.getFixedLabel()); // this.graphBuilder.addLink(source, target, link); // } // } // } // } } private void updateHashMaps() { this.labelToMappingStructs.clear(); List<Node> columnNodes = this.graphBuilder.getTypeToNodesMap().get(NodeType.ColumnNode); if (columnNodes != null) { for (Node n : columnNodes) { Set<Link> incomingLinks = this.graphBuilder.getGraph().incomingEdgesOf(n); if (incomingLinks != null) { Link[] inLinks = incomingLinks.toArray(new Link[0]); for (Link link : inLinks) { Node domain = link.getSource(); if (!(domain instanceof InternalNode)) continue; SemanticLabel sl = new SemanticLabel(domain.getLabel().getUri(), link.getLabel().getUri(), n.getId()); Set<MappingStruct> labelStructs = this.labelToMappingStructs.get(sl); if (labelStructs == null) { labelStructs = new HashSet<MappingStruct>(); this.labelToMappingStructs.put(sl, labelStructs); } labelStructs.add(new MappingStruct((InternalNode)domain, link, (ColumnNode)n)); } } else logger.error("The column node " + n.getId() + " does not have any domain or it has more than one domain."); } } for (Link l : this.graphBuilder.getGraph().edgeSet()) { if (l.getPatternIds().size() > 0) this.patternLinks.add(l); } } private static List<SemanticLabel> getModelSemanticLabels( DirectedWeightedMultigraph<Node, Link> model) { List<SemanticLabel> SemanticLabel2s = new ArrayList<SemanticLabel>(); for (Node n : model.vertexSet()) { if (!(n instanceof ColumnNode) && !(n instanceof LiteralNode)) continue; Set<Link> incomingLinks = model.incomingEdgesOf(n); if (incomingLinks != null) { // && incomingLinks.size() == 1) { Link link = incomingLinks.toArray(new Link[0])[0]; Node domain = link.getSource(); SemanticLabel sl = new SemanticLabel(domain.getLabel().getUri(), link.getLabel().getUri(), n.getId()); SemanticLabel2s.add(sl); } } return SemanticLabel2s; } private void buildGraphFromTrainingModels() { String patternId; // adding the patterns to the graph for (ServiceModel sm : this.trainingData) { if (sm.getModel() == null) continue; patternId = sm.getId(); addPatternToGraph(patternId, sm.getModel()); } // adding the links inferred from the ontology this.graphBuilder.updateGraph(); // this.updateGraphWithUserLinks(); this.updateHashMaps(); } private void addPatternToGraph(String patternId, DirectedWeightedMultigraph<Node, Link> pattern) { for (DirectedWeightedMultigraph<Node, Link> c : this.graphComponents) { PatternContainment containment = new PatternContainment(c, pattern); Set<String> mappedNodes = new HashSet<String>(); Set<String> mappedLinks = new HashSet<String>(); if (containment.containedIn(mappedNodes, mappedLinks)) { for (String n : mappedNodes) this.graphBuilder.getIdToNodeMap().get(n).getPatternIds().add(patternId); for (String l : mappedLinks) this.graphBuilder.getIdToLinkMap().get(l).getPatternIds().add(patternId); return; } } // TODO: What if an existing pattern is contained in the new pattern? // Can we extend the same pattern instead of adding new one DirectedWeightedMultigraph<Node, Link> component = new DirectedWeightedMultigraph<Node, Link>(Link.class); HashMap<Node, Node> visitedNodes; Node source, target; Node n1, n2; // adding the patterns to the graph if (pattern == null) return; visitedNodes = new HashMap<Node, Node>(); for (Link e : pattern.edgeSet()) { source = e.getSource(); target = e.getTarget(); n1 = visitedNodes.get(source); n2 = visitedNodes.get(target); if (n1 == null) { if (source instanceof InternalNode) { String id = nodeIdFactory.getNodeId(source.getLabel().getUri()); InternalNode node = new InternalNode(id, new Label(source.getLabel())); if (this.graphBuilder.addNodeWithoutUpdatingGraph(node)) { n1 = node; component.addVertex(node); } else continue; } else { String id = nodeIdFactory.getNodeId(source.getId()); ColumnNode node = new ColumnNode(id, id, "", ""); if (this.graphBuilder.addNodeWithoutUpdatingGraph(node)) { n1 = node; component.addVertex(node); } else continue; } visitedNodes.put(source, n1); } if (n2 == null) { if (target instanceof InternalNode) { String id = nodeIdFactory.getNodeId(target.getLabel().getUri()); InternalNode node = new InternalNode(id, new Label(target.getLabel())); if (this.graphBuilder.addNodeWithoutUpdatingGraph(node)) { n2 = node; component.addVertex(node); } else continue; } else { ColumnNode node = new ColumnNode(target.getId(), "", "", ""); if (this.graphBuilder.addNodeWithoutUpdatingGraph(node)) { n2 = node; component.addVertex(node); } else continue; } visitedNodes.put(target, n2); } Link link; String id = LinkIdFactory.getLinkId(e.getLabel().getUri(), n1.getId(), n2.getId()); if (n2 instanceof ColumnNode) link = new DataPropertyLink(id, e.getLabel(), false); else link = new ObjectPropertyLink(id, e.getLabel()); link.getPatternIds().add(patternId); if (this.graphBuilder.addLink(n1, n2, link)) { component.addEdge(n1, n2, link); this.graphBuilder.changeLinkWeight(link, ModelingParams.PATTERN_LINK_WEIGHT); } if (!n1.getPatternIds().contains(patternId)) n1.getPatternIds().add(patternId); if (!n2.getPatternIds().contains(patternId)) n2.getPatternIds().add(patternId); } this.graphComponents.add(component); } public void addPatternAndUpdateGraph(String patternId, DirectedWeightedMultigraph<Node, Link> pattern) { addPatternToGraph(patternId, pattern); // adding the links inferred from the ontology this.graphBuilder.updateGraph(); this.updateGraphWithUserLinks(); this.updateHashMaps(); } private void buildLinkCountMap() { String key, sourceUri, targetUri, linkUri; for (ServiceModel sm : this.trainingData) { DirectedWeightedMultigraph<Node, Link> m = sm.getModel(); for (Link link : m.edgeSet()) { if (link instanceof DataPropertyLink) continue; sourceUri = link.getSource().getLabel().getUri(); targetUri = link.getTarget().getLabel().getUri(); linkUri = link.getLabel().getUri(); key = sourceUri + "<" + linkUri + ">" + targetUri; Integer count = this.linkCountMap.get(key); if (count == null) this.linkCountMap.put(key, 1); else this.linkCountMap.put(key, count.intValue() + 1); key = sourceUri+ "<" + linkUri; count = this.linkCountMap.get(key); if (count == null) this.linkCountMap.put(key, 1); else this.linkCountMap.put(key, count.intValue() + 1); key = linkUri + ">" + targetUri; count = this.linkCountMap.get(key); if (count == null) this.linkCountMap.put(key, 1); else this.linkCountMap.put(key, count.intValue() + 1); key = linkUri; count = this.linkCountMap.get(key); if (count == null) this.linkCountMap.put(key, 1); else this.linkCountMap.put(key, count.intValue() + 1); this.sourceToTargetLinks.put(sourceUri + "---" + targetUri, linkUri); } } } private void updateWeights() { List<Link> oldLinks = new ArrayList<Link>(); List<Node> sources = new ArrayList<Node>(); List<Node> targets = new ArrayList<Node>(); List<String> newLinks = new ArrayList<String>(); List<Double> weights = new ArrayList<Double>(); HashMap<String, LinkFrequency> sourceTargetLinkFrequency = new HashMap<String, LinkFrequency>(); LinkFrequency lf1, lf2; String key1, key2; for (Link link : this.graphBuilder.getGraph().edgeSet()) { if (!(link instanceof SimpleLink)) { continue; } key1 = link.getSource().getLabel().getUri() + link.getTarget().getLabel().getUri(); key2 = link.getTarget().getLabel().getUri() + link.getSource().getLabel().getUri(); // if (link.getSource().getLabel().getUri().indexOf("Place") != -1) // if (link.getTarget().getLabel().getUri().indexOf("City") != -1) // System.out.println("debug1"); // if (link.getSource().getLabel().getUri().indexOf("City") != -1) // if (link.getTarget().getLabel().getUri().indexOf("Country") != -1) // System.out.println("debug2"); lf1 = sourceTargetLinkFrequency.get(key1); if (lf1 == null) { lf1 = this.getMoreFrequentLinkBetweenNodes(link.getSource(), link.getTarget()); sourceTargetLinkFrequency.put(key1, lf1); } lf2 = sourceTargetLinkFrequency.get(key2); if (lf2 == null) { lf2 = this.getMoreFrequentLinkBetweenNodes(link.getTarget(), link.getSource()); sourceTargetLinkFrequency.put(key2, lf2); } int c = lf1.compareTo(lf2); if (c > 0) { sources.add(link.getSource()); targets.add(link.getTarget()); newLinks.add(lf1.linkUri); weights.add(lf1.getWeight()); } else if (c < 0) { sources.add(link.getTarget()); targets.add(link.getSource()); newLinks.add(lf2.linkUri); weights.add(lf2.getWeight()); } else continue; oldLinks.add(link); } for (Link link : oldLinks) this.graphBuilder.getGraph().removeEdge(link); String id; String uri; Label label; Link newLink; for (int i = 0; i < newLinks.size(); i++) { uri = newLinks.get(i); id = LinkIdFactory.getLinkId(uri, sources.get(i).getId(), targets.get(i).getId()); label = new Label(uri); if (uri.equalsIgnoreCase(Uris.RDFS_SUBCLASS_URI)) newLink = new SubClassLink(id); else newLink = new ObjectPropertyLink(id, label); this.graphBuilder.addLink(sources.get(i), targets.get(i), newLink); this.graphBuilder.changeLinkWeight(newLink, weights.get(i)); } } private Set<Node> addDataPropertyToDomainNodes(String domainUri, String propertyUri, String columnNodeName) { Set<Node> addedNodes = new HashSet<Node>(); // add dataproperty to existing classes if sl is a data node mapping List<Node> nodesWithSameUriOfDomain = this.graphBuilder.getUriToNodesMap().get(domainUri); if (nodesWithSameUriOfDomain != null) { for (Node source : nodesWithSameUriOfDomain) { if (source instanceof InternalNode && source.getPatternIds().size() > 0) { // boolean propertyLinkExists = false; int countOfExistingPropertyLinks = 0; List<Link> linkWithSameUris = this.graphBuilder.getUriToLinksMap().get(propertyUri); if (linkWithSameUris != null) for (Link l : linkWithSameUris) { if (l.getSource().equals(source)) { countOfExistingPropertyLinks ++; // propertyLinkExists = true; // break; } } if (countOfExistingPropertyLinks >= 1) continue; String nodeId = nodeIdFactory.getNodeId(columnNodeName); ColumnNode target = new ColumnNode(nodeId, "", "", ""); this.graphBuilder.addNodeWithoutUpdatingGraph(target); addedNodes.add(target); String linkId = LinkIdFactory.getLinkId(propertyUri, source.getId(), target.getId()); Link link = new DataPropertyLink(linkId, new Label(propertyUri), false); this.graphBuilder.addLink(source, target, link); } } } return addedNodes; } private Set<Node> addSemanticLabel(SemanticLabel sl) { Set<Node> addedNodes = new HashSet<Node>(); InternalNode source = null; String nodeId; nodeId = nodeIdFactory.getNodeId(sl.getNodeUri()); source = new InternalNode(nodeId, new Label(sl.getNodeUri())); this.graphBuilder.addNode(source, addedNodes); if (sl.getType() == SemanticLabelType.DataProperty) { nodeId = nodeIdFactory.getNodeId(sl.getLeafName()); ColumnNode target = new ColumnNode(nodeId, "", "", ""); this.graphBuilder.addNodeWithoutUpdatingGraph(target); addedNodes.add(target); String linkId = LinkIdFactory.getLinkId(sl.getLinkUri(), source.getId(), target.getId()); Link link = new DataPropertyLink(linkId, new Label(sl.getLinkUri()), false); this.graphBuilder.addLink(source, target, link); } return addedNodes; } private CandidateSteinerSets getCandidateSteinerSets(List<SemanticLabel> semanticLabels, Set<Node> addedNodes) { int maxNumberOfMappedNodes = 0; for (SemanticLabel sl : semanticLabels) { if (sl.getType() == SemanticLabelType.Class) maxNumberOfMappedNodes += 1; else maxNumberOfMappedNodes += 2; } CandidateSteinerSets candidateSteinerSets = new CandidateSteinerSets(maxNumberOfMappedNodes); if (addedNodes == null) addedNodes = new HashSet<Node>(); Set<Node> tempNodeSet = null; for (SemanticLabel sl : semanticLabels) { SemanticTypeMapping mapping; if (sl.getType() == SemanticLabelType.Class) mapping = new SemanticTypeMapping(null, MappingType.ClassNode); else mapping = new SemanticTypeMapping(null, MappingType.DataNode); boolean addSemanticLabel = false; Set<MappingStruct> similarStructsInGraph = this.labelToMappingStructs.get(sl); // if semantic label is a data property, we add this property to all the nodes having the same domain if (sl.getType() == SemanticLabelType.DataProperty) { tempNodeSet = addDataPropertyToDomainNodes(sl.getNodeUri(), sl.getLinkUri(), sl.getLeafName()); addedNodes.addAll(tempNodeSet); } if ((sl.getType() == SemanticLabelType.Class && similarStructsInGraph == null) || sl.getType() == SemanticLabelType.DataProperty && similarStructsInGraph == null && tempNodeSet.size() == 0) addSemanticLabel = true; if (addSemanticLabel) { tempNodeSet = addSemanticLabel(sl); addedNodes.addAll(tempNodeSet); } this.updateHashMaps(); similarStructsInGraph = this.labelToMappingStructs.get(sl); for (MappingStruct ms : similarStructsInGraph) { mapping.addMappingStruct(ms); } candidateSteinerSets.updateSteinerSets(mapping); } return candidateSteinerSets; } // private List<RankedSteinerSet> rankSteinerSets(List<Set<Node>> steinerNodeSets) { // // List<RankedSteinerSet> rankedSteinerSets = new ArrayList<RankedSteinerSet>(); // for (Set<Node> nodes : steinerNodeSets) { //// if (nodes.size() == 17) //// System.out.println(nodes.size()); // RankedSteinerSet r = new RankedSteinerSet(nodes); // rankedSteinerSets.add(r); // } // // Collections.sort(rankedSteinerSets); // // // if (rankedSteinerSets != null && rankedSteinerSets.size() > MAX_STEINER_NODES_SETS ) // return rankedSteinerSets.subList(0, MAX_STEINER_NODES_SETS); // // return rankedSteinerSets; // } // // private List<Set<Node>> getSteinerNodeSets(List<Set<MappingStruct>> labelStructSets, int numOfAttributes) { // // if (labelStructSets == null) // return null; // // Set<List<MappingStruct>> labelStructLists = Sets.cartesianProduct(labelStructSets); // logger.info("cartesian product of label structs is done, size: " + labelStructLists.size()); // // List<Set<Node>> steinerNodeSets = new ArrayList<Set<Node>>(); // // int numOfTargets; // for (List<MappingStruct> labelStructs : labelStructLists) { //// System.out.println(i++); // numOfTargets = 0; // Set<Node> steinerNodes = new HashSet<Node>(); //// Set<String> debug = new HashSet<String>(); // for (MappingStruct ls : labelStructs) { // steinerNodes.add(ls.getSource()); // if (!steinerNodes.contains(ls.getTarget())) // numOfTargets ++; // steinerNodes.add(ls.getTarget()); //// if (debug.contains(ls.getSource().getId() + ls.getLink().getId())) //// System.out.println("debug"); //// debug.add(ls.getSource().getId() + ls.getLink().getId()); // } // if (numOfTargets == numOfAttributes) // steinerNodeSets.add(steinerNodes); // } // // return steinerNodeSets; // // } private DirectedWeightedMultigraph<Node, Link> computeSteinerTree(Set<Node> steinerNodes) { if (steinerNodes == null || steinerNodes.size() == 0) { logger.error("There is no steiner node."); return null; } // System.out.println(steinerNodes.size()); List<Node> steinerNodeList = new ArrayList<Node>(steinerNodes); // List<Link> updatedLinks = new ArrayList<Link>(); // for (Link l : this.patternLinks) { // if (steinerNodes.contains(l.getSource()) && steinerNodes.contains(l.getTarget())) // continue; // updatedLinks.add(l); // } // // for (Link l : updatedLinks) { // this.graphBuilder.changeLinkWeight(l, ModelingParams.PROPERTY_DIRECT_WEIGHT); // } // GraphUtil.printGraphSimple(this.graphBuilder.getGraph()); long start = System.currentTimeMillis(); UndirectedGraph<Node, Link> undirectedGraph = new AsUndirectedGraph<Node, Link>(this.graphBuilder.getGraph()); logger.info("computing steiner tree ..."); SteinerTree steinerTree = new SteinerTree(undirectedGraph, steinerNodeList); DirectedWeightedMultigraph<Node, Link> tree = (DirectedWeightedMultigraph<Node, Link>)GraphUtil.asDirectedGraph(steinerTree.getSteinerTree()); GraphUtil.printGraphSimple(tree); long steinerTreeElapsedTimeMillis = System.currentTimeMillis() - start; logger.info("total number of nodes in steiner tree: " + tree.vertexSet().size()); logger.info("total number of edges in steiner tree: " + tree.edgeSet().size()); logger.info("time to compute steiner tree: " + (steinerTreeElapsedTimeMillis/1000F)); // for (Link l : updatedLinks) { // this.graphBuilder.changeLinkWeight(l, ModelingParams.PATTERN_LINK_WEIGHT); // } return tree; // long finalTreeElapsedTimeMillis = System.currentTimeMillis() - steinerTreeElapsedTimeMillis; // DirectedWeightedMultigraph<Node, Link> finalTree = buildOutputTree(tree); // logger.info("time to build final tree: " + (finalTreeElapsedTimeMillis/1000F)); // GraphUtil.printGraph(finalTree); // return finalTree; } // private List<RankedModel> rankModels(List<DirectedWeightedMultigraph<Node, Link>> models) { // // List<RankedModel> rankedModels = new ArrayList<RankedModel>(); // if (models == null || models.size() == 0) // return rankedModels; // // int count = 1; // // for (DirectedWeightedMultigraph<Node, Link> m : models) { // logger.info("computing raking factors for model " + count + " ..."); // RankedModel r = new RankedModel(m); // rankedModels.add(r); // count ++; // logger.info("coherence=" + r.getCoherenceString() + ", cost=" + r.getCost()); // } // //// Collections.sort(rankedModels); // return rankedModels; // } public List<RankedModel> hypothesize(List<SemanticLabel> semanticLabels, int numOfAttributes) { Set<Node> addedNodes = new HashSet<Node>(); //They should be deleted from the graph after computing the semantic models CandidateSteinerSets candidateSteinerSets = getCandidateSteinerSets(semanticLabels, addedNodes); logger.info("number of steiner sets: " + candidateSteinerSets.numberOfCandidateSets()); // List<Set<Node>> steinerNodeSets = getSteinerNodeSets(labelStructSets, numOfAttributes); // if (steinerNodeSets == null || steinerNodeSets.size() == 0) return null; // // logger.info("number of possible steiner nodes sets:" + steinerNodeSets.size()); // // //// for (List<Node> steinerNodes : steinerNodeSets) { //// System.out.println(); //// System.out.println(); //// //// for (Node n : steinerNodes) { //// System.out.println(n.getId()); //// } //// //// System.out.println(); //// System.out.println(); //// } // // List<RankedSteinerSet> rankedSteinerSets = rankSteinerSets(steinerNodeSets); // //// for (RankedSteinerSet r : rankedSteinerSets) //// System.out.println(r.getCohesionString()); logger.info("updating weights according to training data ..."); long start = System.currentTimeMillis(); this.updateWeights(); long updateWightsElapsedTimeMillis = System.currentTimeMillis() - start; logger.info("time to update weights: " + (updateWightsElapsedTimeMillis/1000F)); // int count = 1; // for (RankedSteinerSet r : rankedSteinerSets) { // logger.info("computing steiner tree for steiner nodes set " + count + " ..."); // DirectedWeightedMultigraph<Node, Link> tree = computeSteinerTree(r.getNodes()); // count ++; // if (tree != null) models.add(tree); // } // List<DirectedWeightedMultigraph<Node, Link>> models = // new ArrayList<DirectedWeightedMultigraph<Node,Link>>(); List<RankedModel> rankedModels = new ArrayList<RankedModel>(); int count = 1; for (SteinerNodes sn : candidateSteinerSets.getSteinerSets()) { logger.info("computing steiner tree for steiner nodes set " + count + " ..."); sn.print(); DirectedWeightedMultigraph<Node, Link> tree = computeSteinerTree(sn.getNodes()); count ++; if (tree != null) { RankedModel r = new RankedModel(tree, sn); rankedModels.add(r); } if (count == MAX_STEINER_NODES_SETS) break; } List<RankedModel> uniqueModels = new ArrayList<RankedModel>(); RankedModel current, previous; if (rankedModels != null) { Collections.sort(rankedModels); if (rankedModels.size() > 0) uniqueModels.add(rankedModels.get(0)); for (int i = 1; i < rankedModels.size(); i++) { current = rankedModels.get(i); previous = rankedModels.get(i - 1); if (current.getScore() == previous.getScore() && current.getCost() == previous.getCost()) continue; uniqueModels.add(current); } if (uniqueModels.size() > MAX_CANDIDATES ) return uniqueModels.subList(0, MAX_CANDIDATES); } return uniqueModels; } private LinkFrequency getMoreFrequentLinkBetweenNodes(Node source, Node target) { String sourceUri, targetUri; List<String> possibleLinksFromSourceToTarget = new ArrayList<String>(); sourceUri = source.getLabel().getUri(); targetUri = target.getLabel().getUri(); HashSet<String> objectPropertiesDirect; HashSet<String> objectPropertiesIndirect; HashSet<String> objectPropertiesWithOnlyDomain; HashSet<String> objectPropertiesWithOnlyRange; HashMap<String, Label> objectPropertiesWithoutDomainAndRange = ontologyManager.getObjectPropertiesWithoutDomainAndRange(); sourceUri = source.getLabel().getUri(); targetUri = target.getLabel().getUri(); possibleLinksFromSourceToTarget.clear(); objectPropertiesDirect = ontologyManager.getObjectPropertiesDirect(sourceUri, targetUri); if (objectPropertiesDirect != null) possibleLinksFromSourceToTarget.addAll(objectPropertiesDirect); objectPropertiesIndirect = ontologyManager.getObjectPropertiesIndirect(sourceUri, targetUri); if (objectPropertiesIndirect != null) possibleLinksFromSourceToTarget.addAll(objectPropertiesIndirect); objectPropertiesWithOnlyDomain = ontologyManager.getObjectPropertiesWithOnlyDomain(sourceUri, targetUri); if (objectPropertiesWithOnlyDomain != null) possibleLinksFromSourceToTarget.addAll(objectPropertiesWithOnlyDomain); objectPropertiesWithOnlyRange = ontologyManager.getObjectPropertiesWithOnlyRange(sourceUri, targetUri); if (objectPropertiesWithOnlyRange != null) possibleLinksFromSourceToTarget.addAll(objectPropertiesWithOnlyRange); if (ontologyManager.isSubClass(sourceUri, targetUri, true)) possibleLinksFromSourceToTarget.add(Uris.RDFS_SUBCLASS_URI); if (objectPropertiesWithoutDomainAndRange != null) { possibleLinksFromSourceToTarget.addAll(objectPropertiesWithoutDomainAndRange.keySet()); } // Collection<String> userLinks = this.sourceToTargetLinks.get(sourceUri + "---" + targetUri); // if (userLinks != null) { // for (String s : userLinks) // possibleLinksFromSourceToTarget.add(s); // } String selectedLinkUri1 = null; int maxCount1 = 0; String selectedLinkUri2 = null; int maxCount2 = 0; String selectedLinkUri3 = null; int maxCount3 = 0; String selectedLinkUri4 = null; int maxCount4 = 0; String key; if (possibleLinksFromSourceToTarget != null && possibleLinksFromSourceToTarget.size() > 0) { for (String s : possibleLinksFromSourceToTarget) { key = sourceUri + "<" + s + ">" + targetUri; Integer count1 = this.linkCountMap.get(key); if (count1 != null && count1.intValue() > maxCount1) { maxCount1 = count1.intValue(); selectedLinkUri1 = s; } } for (String s : possibleLinksFromSourceToTarget) { key = s + ">" + targetUri; Integer count2 = this.linkCountMap.get(key); if (count2 != null && count2.intValue() > maxCount2) { maxCount2 = count2.intValue(); selectedLinkUri2 = s; } } for (String s : possibleLinksFromSourceToTarget) { key = sourceUri + "<" + s; Integer count3 = this.linkCountMap.get(key); if (count3 != null && count3.intValue() > maxCount3) { maxCount3 = count3.intValue(); selectedLinkUri3 = s; } } for (String s : possibleLinksFromSourceToTarget) { key = s; Integer count4 = this.linkCountMap.get(key); if (count4 != null && count4.intValue() > maxCount4) { maxCount4 = count4.intValue(); selectedLinkUri4 = s; } } } else { logger.error("Something is going wrong. There should be at least one possible object property between " + sourceUri + " and " + targetUri); return null; } String selectedLinkUri; int maxCount; int type; if (selectedLinkUri1 != null && selectedLinkUri1.trim().length() > 0) { selectedLinkUri = selectedLinkUri1; maxCount = maxCount1; type = 1; // match domain and link and range } else if (selectedLinkUri2 != null && selectedLinkUri2.trim().length() > 0) { selectedLinkUri = selectedLinkUri2; maxCount = maxCount2; type = 2; // match link and range } else if (selectedLinkUri3 != null && selectedLinkUri3.trim().length() > 0) { selectedLinkUri = selectedLinkUri3; maxCount = maxCount3; type = 3; // match domain and link } else if (selectedLinkUri4 != null && selectedLinkUri4.trim().length() > 0) { selectedLinkUri = selectedLinkUri4; maxCount = maxCount4; type = 4; // match link label } else { if (objectPropertiesDirect != null && objectPropertiesDirect.size() > 0) { selectedLinkUri = objectPropertiesDirect.iterator().next(); type = 5; } else if (objectPropertiesIndirect != null && objectPropertiesIndirect.size() > 0) { selectedLinkUri = objectPropertiesIndirect.iterator().next(); type = 6; } else if (objectPropertiesWithOnlyDomain != null && objectPropertiesWithOnlyDomain.size() > 0) { selectedLinkUri = objectPropertiesWithOnlyDomain.iterator().next(); type = 7; } else if (objectPropertiesWithOnlyRange != null && objectPropertiesWithOnlyRange.size() > 0) { selectedLinkUri = objectPropertiesWithOnlyRange.iterator().next();; type = 8; } else if (ontologyManager.isSubClass(sourceUri, targetUri, true)) { selectedLinkUri = Uris.RDFS_SUBCLASS_URI; type = 9; } else { // if (objectPropertiesWithoutDomainAndRange != null && objectPropertiesWithoutDomainAndRange.keySet().size() > 0) { selectedLinkUri = new ArrayList<String>(objectPropertiesWithoutDomainAndRange.keySet()).get(0); type = 10; } maxCount = 0; } LinkFrequency lf = new LinkFrequency(selectedLinkUri, type, maxCount); return lf; } // private static double roundTwoDecimals(double d) { // DecimalFormat twoDForm = new DecimalFormat("#.##"); // return Double.valueOf(twoDForm.format(d)); // } private static void testApproach() throws Exception { String inputPath = Params.INPUT_DIR; String outputPath = Params.OUTPUT_DIR; String graphPath = Params.GRAPHS_DIR; List<ServiceModel> serviceModels = ModelReader.importServiceModels(inputPath); List<ServiceModel> trainingData = new ArrayList<ServiceModel>(); OntologyManager ontManager = new OntologyManager(); File ff = new File(Params.ONTOLOGY_DIR); File[] files = ff.listFiles(); for (File f : files) { ontManager.doImport(f); } ontManager.updateCache(); // // experiment 1 // OntologyManager ontManager = new OntologyManager(); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "dbpedia_3.8.owl")); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "foaf.rdf")); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "wgs84_pos.xml")); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "rdf-schema.rdf")); // ontManager.updateCache(); // experiment 2 - museum data // OntologyManager ontManager = new OntologyManager(); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "100_rdf.owl")); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "105_Rdf-schema.owl")); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "120_dcterms.rdf")); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "140_foaf.owl")); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "180_rdaGr2.rdf")); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "190_ore.owl")); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "220_edm_from_xuming.owl")); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "230_saam-ont.owl")); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "250_skos.owl")); // ontManager.doImport(new File(Params.ONTOLOGY_DIR + "260_aac-ont.owl")); // ontManager.updateCache(); for (int i = 0; i < serviceModels.size(); i++) { // int i = 2; { trainingData.clear(); int newServiceIndex = i; ServiceModel newService = serviceModels.get(newServiceIndex); logger.info("======================================================"); logger.info(newService.getServiceDescription()); logger.info("======================================================"); // int[] trainingModels = {0, 4}; // for (int n = 0; n < trainingModels.length; n++) { int j = trainingModels[n]; for (int j = 0; j < serviceModels.size(); j++) { if (j != newServiceIndex) trainingData.add(serviceModels.get(j)); } Approach1 app = new Approach1(trainingData, ontManager); String graphName = graphPath + "graph" + String.valueOf(i+1); if (new File(graphName).exists()) { // read graph from file try { app.loadGraph(ontManager, graphName); } catch (Exception e) { e.printStackTrace(); } } else { logger.info("building the graph ..."); app.buildGraphFromTrainingModels(); // save graph to file try { app.saveGraph(graphName); } catch (Exception e) { e.printStackTrace(); } } // GraphUtil.printGraph(graph); DirectedWeightedMultigraph<Node, Link> correctModel = newService.getModel(); // we just get the semantic labels of the correct model List<SemanticLabel> newServiceSemanticLabel2s = getModelSemanticLabels(correctModel); int numOfattributes = newServiceSemanticLabel2s.size(); List<RankedModel> hypothesisList = app.hypothesize(newServiceSemanticLabel2s, numOfattributes); // if (hypothesis == null) // continue; Map<String, DirectedWeightedMultigraph<Node, Link>> graphs = new TreeMap<String, DirectedWeightedMultigraph<Node,Link>>(); if (hypothesisList != null) for (int k = 0; k < hypothesisList.size() && k < 3; k++) { RankedModel m = hypothesisList.get(k); GraphUtil.serialize(m.getModel(), Params.JGRAPHT_DIR + newService.getServiceNameWithPrefix() + ".app1.rank" + (k+1) + ".jgraph"); } graphs.put("1-correct model", correctModel); if (hypothesisList != null) for (int k = 0; k < hypothesisList.size(); k++) { RankedModel m = hypothesisList.get(k); double distance = Util.getDistance(correctModel, m.getModel()); // double distance = new GraphMatching(Util.toGxl(correctModel), // Util.toGxl(m.getModel())).getDistance(); String label = "candidate" + k + "--distance:" + distance + "---" + m.getDescription(); graphs.put(label, m.getModel()); } GraphVizUtil.exportJGraphToGraphvizFile(graphs, newService.getServiceDescription(), outputPath + serviceModels.get(i).getServiceNameWithPrefix() + ".app1.details.dot"); } } public static void main(String[] args) { try { // testSelectionOfBestMatch(); testApproach(); } catch (Exception e) { e.printStackTrace(); } } }