/*******************************************************************************
* Copyright 2012 University of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This code was developed by the Information Integration Group as part
* of the Karma project at the Information Sciences Institute of the
* University of Southern California. For more information, publications,
* and related projects, please see: http://www.isi.edu/integration
******************************************************************************/
package edu.isi.karma.research.modeling;
import java.io.File;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.jgrapht.graph.AsUndirectedGraph;
import org.jgrapht.graph.DirectedWeightedMultigraph;
import org.jgrapht.graph.WeightedMultigraph;
import org.python.google.common.collect.Lists;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import edu.isi.karma.config.ModelingConfiguration;
import edu.isi.karma.config.ModelingConfigurationRegistry;
import edu.isi.karma.modeling.alignment.GraphBuilder;
import edu.isi.karma.modeling.alignment.GraphBuilderTopK;
import edu.isi.karma.modeling.alignment.GraphUtil;
import edu.isi.karma.modeling.alignment.GraphVizLabelType;
import edu.isi.karma.modeling.alignment.GraphVizUtil;
import edu.isi.karma.modeling.alignment.LinkIdFactory;
import edu.isi.karma.modeling.alignment.ModelEvaluation;
import edu.isi.karma.modeling.alignment.NodeIdFactory;
import edu.isi.karma.modeling.alignment.SemanticModel;
import edu.isi.karma.modeling.alignment.SteinerTree;
import edu.isi.karma.modeling.alignment.TreePostProcess;
import edu.isi.karma.modeling.alignment.learner.CandidateSteinerSets;
import edu.isi.karma.modeling.alignment.learner.ModelLearningGraph;
import edu.isi.karma.modeling.alignment.learner.ModelLearningGraphType;
import edu.isi.karma.modeling.alignment.learner.ModelReader;
import edu.isi.karma.modeling.alignment.learner.SemanticTypeMapping;
import edu.isi.karma.modeling.alignment.learner.SortableSemanticModel;
import edu.isi.karma.modeling.alignment.learner.SteinerNodes;
import edu.isi.karma.modeling.ontology.OntologyManager;
import edu.isi.karma.modeling.research.Params;
import edu.isi.karma.rep.alignment.ClassInstanceLink;
import edu.isi.karma.rep.alignment.ColumnNode;
import edu.isi.karma.rep.alignment.ColumnSemanticTypeStatus;
import edu.isi.karma.rep.alignment.DataPropertyLink;
import edu.isi.karma.rep.alignment.DefaultLink;
import edu.isi.karma.rep.alignment.InternalNode;
import edu.isi.karma.rep.alignment.Label;
import edu.isi.karma.rep.alignment.LabeledLink;
import edu.isi.karma.rep.alignment.Node;
import edu.isi.karma.rep.alignment.SemanticType;
import edu.isi.karma.rep.alignment.SemanticType.Origin;
import edu.isi.karma.util.RandomGUID;
import edu.isi.karma.webserver.ContextParametersRegistry;
import edu.isi.karma.webserver.ServletContextParameterMap;
import edu.isi.karma.webserver.ServletContextParameterMap.ContextParameter;
public class ModelLearner_LOD {
private static Logger logger = LoggerFactory.getLogger(ModelLearner_LOD.class);
private OntologyManager ontologyManager = null;
private GraphBuilder graphBuilder = null;
private NodeIdFactory nodeIdFactory = null;
private List<Node> steinerNodes = null;
private SemanticModel semanticModel = null;
// private long lastUpdateTimeOfGraph;
private boolean useKarmaAlignmentGraph;
private static final int NUM_SEMANTIC_TYPES = 4;
public ModelLearner_LOD(OntologyManager ontologyManager,
List<Node> steinerNodes) {
if (ontologyManager == null ||
steinerNodes == null ||
steinerNodes.isEmpty()) {
logger.error("cannot instanciate model learner!");
return;
}
this.ontologyManager = ontologyManager;
this.steinerNodes = steinerNodes;
this.useKarmaAlignmentGraph = true;
}
public ModelLearner_LOD(GraphBuilder graphBuilder,
List<Node> steinerNodes) {
if (graphBuilder == null ||
steinerNodes == null ||
steinerNodes.isEmpty()) {
logger.error("cannot instanciate model learner!");
return;
}
this.steinerNodes = steinerNodes;
this.graphBuilder = graphBuilder;
this.nodeIdFactory = this.graphBuilder.getNodeIdFactory();
this.ontologyManager = this.graphBuilder.getOntologyManager();
this.useKarmaAlignmentGraph = false;
}
public SemanticModel getModel() {
if (this.semanticModel == null)
try {
this.learn();
} catch (Exception e) {
logger.error("error in learing the semantic model for the source " + this.semanticModel != null ? this.semanticModel.getId() : "");
e.printStackTrace();
}
return this.semanticModel;
}
public void learn() throws Exception {
if (this.useKarmaAlignmentGraph) {
this.graphBuilder =
ModelLearningGraph.getInstance(ontologyManager, ModelLearningGraphType.Compact).getGraphBuilder();
this.nodeIdFactory = this.graphBuilder.getNodeIdFactory();
}
this.graphBuilder = cloneGraphBuilder(this.graphBuilder); // create a copy of the graph builder
List<SortableSemanticModel> hypothesisList = this.hypothesize(true, NUM_SEMANTIC_TYPES);
if (hypothesisList != null && !hypothesisList.isEmpty()) {
SortableSemanticModel m = hypothesisList.get(0);
this.semanticModel = new SemanticModel(m);
} else {
this.semanticModel = null;
}
}
private GraphBuilder cloneGraphBuilder(GraphBuilder graphBuilder) {
GraphBuilder clonedGraphBuilder = null;
if (graphBuilder == null || graphBuilder.getGraph() == null) {
clonedGraphBuilder = new GraphBuilderTopK(this.ontologyManager, false);
} else {
clonedGraphBuilder = new GraphBuilderTopK(this.ontologyManager, graphBuilder.getGraph());
}
return clonedGraphBuilder;
}
// private boolean isGraphUpToDate() {
//
// if (this.lastUpdateTimeOfGraph < this.modelLearningGraph.getLastUpdateTime())
// return false;
//
// return true;
// }
public List<SortableSemanticModel> hypothesize(boolean useCorrectTypes, int numberOfCandidates) throws Exception {
logger.info("graph nodes: " + this.graphBuilder.getGraph().vertexSet().size());
logger.info("graph links: " + this.graphBuilder.getGraph().edgeSet().size());
ModelingConfiguration modelingConfiguration = ModelingConfigurationRegistry.getInstance().getModelingConfiguration(ontologyManager.getContextId());
List<SortableSemanticModel> sortableSemanticModels = new ArrayList<SortableSemanticModel>();
Set<Node> addedNodes = new HashSet<Node>(); //They should be deleted from the graph after computing the semantic models
List<ColumnNode> columnNodes = new LinkedList<ColumnNode>();
for (Node n : steinerNodes)
if (n instanceof ColumnNode)
columnNodes.add((ColumnNode)n);
logger.info("finding candidate steiner sets ... ");
CandidateSteinerSets candidateSteinerSets = getCandidateSteinerSets(steinerNodes, useCorrectTypes, numberOfCandidates, addedNodes);
if (candidateSteinerSets == null ||
candidateSteinerSets.getSteinerSets() == null ||
candidateSteinerSets.getSteinerSets().isEmpty()) {
logger.error("there is no candidate set of steiner nodes.");
DirectedWeightedMultigraph<Node, LabeledLink> tree =
new DirectedWeightedMultigraph<Node, LabeledLink>(LabeledLink.class);
for (Node n : steinerNodes)
tree.addVertex(n);
SemanticModel sm = new SemanticModel(new RandomGUID().toString(), tree);
SortableSemanticModel sortableSemanticModel = new SortableSemanticModel(sm, null, false);
sortableSemanticModels.add(sortableSemanticModel);
return sortableSemanticModels;
}
logger.info("graph nodes: " + this.graphBuilder.getGraph().vertexSet().size());
logger.info("graph links: " + this.graphBuilder.getGraph().edgeSet().size());
logger.info("number of steiner sets: " + candidateSteinerSets.numberOfCandidateSets());
// logger.info("updating weights according to training data ...");
// long start = System.currentTimeMillis();
// this.updateWeights();
// long updateWightsElapsedTimeMillis = System.currentTimeMillis() - start;
// logger.info("time to update weights: " + (updateWightsElapsedTimeMillis/1000F));
logger.info("computing steiner trees ...");
int number = 0;
for (SteinerNodes sn : candidateSteinerSets.getSteinerSets()) {
if (sn == null) continue;
logger.debug("computing steiner tree for steiner nodes set " + number + " ...");
logger.debug(sn.getScoreDetailsString());
number++;
// logger.info("START ...");
List<DirectedWeightedMultigraph<Node, LabeledLink>> topKSteinerTrees;
// int permutation = 3;
// if (sn.getNodesCount() > 18)
// permutation = 2;
if (this.graphBuilder instanceof GraphBuilderTopK) // which is not in ModelLearner_LOD
topKSteinerTrees = ((GraphBuilderTopK)this.graphBuilder).getTopKSteinerTrees(sn,
modelingConfiguration.getTopKSteinerTree(),
5, 2, true);
else
{
topKSteinerTrees = new LinkedList<DirectedWeightedMultigraph<Node, LabeledLink>>();
SteinerTree steinerTree = new SteinerTree(
new AsUndirectedGraph<Node, DefaultLink>(this.graphBuilder.getGraph()), Lists.newLinkedList(sn.getNodes()));
WeightedMultigraph<Node, DefaultLink> t = steinerTree.getDefaultSteinerTree();
TreePostProcess treePostProcess = new TreePostProcess(this.graphBuilder, t);
if (treePostProcess.getTree() != null)
topKSteinerTrees.add(treePostProcess.getTree());
}
// System.out.println(GraphUtil.labeledGraphToString(treePostProcess.getTree()));
// logger.info("END ...");
for (DirectedWeightedMultigraph<Node, LabeledLink> tree: topKSteinerTrees) {
if (tree != null) {
// System.out.println();
SemanticModel sm = new SemanticModel(new RandomGUID().toString(),
tree,
columnNodes,
sn.getMappingToSourceColumns()
);
SortableSemanticModel sortableSemanticModel =
new SortableSemanticModel(sm, sn, false);
sortableSemanticModels.add(sortableSemanticModel);
// System.out.println(GraphUtil.labeledGraphToString(sm.getGraph()));
// System.out.println(sortableSemanticModel.getRankingDetails());
// System.out.println(sortableSemanticModel.getLinkCoherence().printCoherenceList());
}
}
if (number >= modelingConfiguration.getNumCandidateMappings())
break;
}
Collections.sort(sortableSemanticModels, new LOD_SemanticModelComparator());
// int count = Math.min(sortableSemanticModels.size(), ModelingConfiguration.getNumCandidateMappings());
logger.info("results are ready ...");
// sortableSemanticModels.get(0).print();
// return sortableSemanticModels.subList(0, count);
List<SortableSemanticModel> uniqueModels = new ArrayList<SortableSemanticModel>();
SortableSemanticModel current, previous;
if (sortableSemanticModels != null) {
if (sortableSemanticModels.size() > 0)
uniqueModels.add(sortableSemanticModels.get(0));
for (int i = 1; i < sortableSemanticModels.size(); i++) {
current = sortableSemanticModels.get(i);
previous = sortableSemanticModels.get(i - 1);
if (current.getScore() == previous.getScore() && current.getCost() == previous.getCost())
continue;
uniqueModels.add(current);
}
}
logger.info("results are ready ...");
return uniqueModels;
}
private CandidateSteinerSets getCandidateSteinerSets(List<Node> steinerNodes, boolean useCorrectTypes, int numberOfCandidates, Set<Node> addedNodes) {
if (steinerNodes == null || steinerNodes.isEmpty())
return null;
int maxNumberOfSteinerNodes = steinerNodes.size() * 2;
CandidateSteinerSets candidateSteinerSets = new CandidateSteinerSets(maxNumberOfSteinerNodes, ontologyManager.getContextId());
if (addedNodes == null)
addedNodes = new HashSet<Node>();
Set<SemanticTypeMapping> tempSemanticTypeMappings;
HashMap<ColumnNode, List<SemanticType>> columnSemanticTypes = new HashMap<ColumnNode, List<SemanticType>>();
HashMap<String, Integer> semanticTypesCount = new HashMap<String, Integer>();
List<SemanticType> candidateSemanticTypes = null;
String domainUri = "", propertyUri = "";
for (Node n : steinerNodes) {
ColumnNode cn = null;
if (n instanceof ColumnNode)
cn = (ColumnNode)n;
else
continue;
if (!useCorrectTypes) {
candidateSemanticTypes = cn.getTopKLearnedSemanticTypes(numberOfCandidates);
} else if (cn.getSemanticTypeStatus() == ColumnSemanticTypeStatus.UserAssigned) {
candidateSemanticTypes = cn.getUserSemanticTypes();
}
if (candidateSemanticTypes == null) {
logger.error("No candidate semantic type found for the column " + cn.getColumnName());
return null;
}
columnSemanticTypes.put(cn, candidateSemanticTypes);
for (SemanticType semanticType: candidateSemanticTypes) {
if (semanticType == null ||
semanticType.getDomain() == null ||
semanticType.getType() == null) continue;
domainUri = semanticType.getDomain().getUri();
propertyUri = semanticType.getType().getUri();
Integer count = semanticTypesCount.get(domainUri + propertyUri);
if (count == null) semanticTypesCount.put(domainUri + propertyUri, 1);
else semanticTypesCount.put(domainUri + propertyUri, count.intValue() + 1);
}
}
long numOfMappings = 1;
for (Node n : steinerNodes) {
if (n instanceof InternalNode)
continue;
ColumnNode cn = null;
if (n instanceof ColumnNode)
cn = (ColumnNode)n;
else
continue;
candidateSemanticTypes = columnSemanticTypes.get(n);
if (candidateSemanticTypes == null) continue;
logger.info("===== Column: " + cn.getColumnName());
Set<SemanticTypeMapping> semanticTypeMappings = new HashSet<SemanticTypeMapping>();
for (SemanticType semanticType: candidateSemanticTypes) {
logger.info("\t" + semanticType.getConfidenceScore() + " :" + semanticType.getModelLabelString());
if (semanticType == null ||
semanticType.getDomain() == null ||
semanticType.getType() == null) continue;
domainUri = semanticType.getDomain().getUri();
propertyUri = semanticType.getType().getUri();
Integer countOfSemanticType = semanticTypesCount.get(domainUri + propertyUri);
logger.debug("count of semantic type: " + countOfSemanticType);
// if (cn.hasUserType()) {
// HashMap<SemanticType, LabeledLink> domainLinks =
// GraphUtil.getDomainLinks(this.graphBuilder.getGraph(), cn, cn.getUserSemanticTypes());
// if (domainLinks != null && !domainLinks.isEmpty()) {
// for (SemanticType st : cn.getUserSemanticTypes()) {
// semanticTypeMappings = new HashSet<SemanticTypeMapping>();
// LabeledLink domainLink = domainLinks.get(st);
// if (domainLink.getSource() == null || !(domainLink.getSource() instanceof InternalNode))
// continue;
// SemanticTypeMapping mp =
// new SemanticTypeMapping(cn, st, (InternalNode)domainLink.getSource(), domainLink, cn);
// semanticTypeMappings.add(mp);
// candidateSteinerSets.updateSteinerSets(semanticTypeMappings);
// }
// }
// } else
{
tempSemanticTypeMappings = findSemanticTypeInGraph(cn, semanticType, semanticTypesCount, addedNodes);
logger.debug("number of matches for semantic type: " +
+ (tempSemanticTypeMappings == null ? 0 : tempSemanticTypeMappings.size()));
if (tempSemanticTypeMappings != null)
semanticTypeMappings.addAll(tempSemanticTypeMappings);
int countOfMatches = tempSemanticTypeMappings == null ? 0 : tempSemanticTypeMappings.size();
// if (countOfMatches < countOfSemanticType)
if (countOfMatches == 0) // No struct in graph is matched with the semantic type, we add a new struct to the graph
{
SemanticTypeMapping mp = addSemanticTypeStruct(cn, semanticType, addedNodes);
if (mp != null)
semanticTypeMappings.add(mp);
}
}
}
// System.out.println("number of matches for column " + n.getColumnName() +
// ": " + (semanticTypeMappings == null ? 0 : semanticTypeMappings.size()));
logger.debug("number of matches for column " + cn.getColumnName() +
": " + (semanticTypeMappings == null ? 0 : semanticTypeMappings.size()));
numOfMappings *= (semanticTypeMappings == null || semanticTypeMappings.isEmpty() ? 1 : semanticTypeMappings.size());
logger.debug("number of candidate steiner sets before update: " + candidateSteinerSets.getSteinerSets().size());
candidateSteinerSets.updateSteinerSets(semanticTypeMappings);
logger.debug("number of candidate steiner sets after update: " + candidateSteinerSets.getSteinerSets().size());
}
for (Node n : steinerNodes) {
if (n instanceof InternalNode) {
candidateSteinerSets.updateSteinerSets((InternalNode)n);
}
}
// System.out.println("number of possible mappings: " + numOfMappings);
logger.info("number of possible mappings: " + numOfMappings);
return candidateSteinerSets;
}
private Set<SemanticTypeMapping> findSemanticTypeInGraph(ColumnNode sourceColumn, SemanticType semanticType,
HashMap<String, Integer> semanticTypesCount, Set<Node> addedNodes) {
logger.debug("finding matches for semantic type in the graph ... ");
ModelingConfiguration modelingConfiguration = ModelingConfigurationRegistry.getInstance().getModelingConfiguration(ontologyManager.getContextId());
if (addedNodes == null)
addedNodes = new HashSet<Node>();
Set<SemanticTypeMapping> mappings = new HashSet<SemanticTypeMapping>();
if (semanticType == null) {
logger.error("semantic type is null.");
return mappings;
}
if (semanticType.getDomain() == null) {
logger.error("semantic type does not have any domain");
return mappings;
}
if (semanticType.getType() == null) {
logger.error("semantic type does not have any link");
return mappings;
}
String domainUri = semanticType.getDomain().getUri();
String propertyUri = semanticType.getType().getUri();
Double confidence = semanticType.getConfidenceScore();
Origin origin = semanticType.getOrigin();
Integer countOfSemanticType = semanticTypesCount.get(domainUri + propertyUri);
if (countOfSemanticType == null) {
logger.error("count of semantic type should not be null or zero");
return mappings;
}
if (domainUri == null || domainUri.isEmpty()) {
logger.error("semantic type does not have any domain");
return mappings;
}
if (propertyUri == null || propertyUri.isEmpty()) {
logger.error("semantic type does not have any link");
return mappings;
}
logger.debug("semantic type: " + domainUri + "|" + propertyUri + "|" + confidence + "|" + origin);
// add dataproperty to existing classes if sl is a data node mapping
// Set<Node> foundInternalNodes = new HashSet<Node>();
Double weight = null;
Set<SemanticTypeMapping> semanticTypeMatches = this.graphBuilder.getSemanticTypeMatches().get(domainUri + propertyUri);
if (semanticTypeMatches != null) {
for (SemanticTypeMapping stm : semanticTypeMatches) {
SemanticTypeMapping mp =
new SemanticTypeMapping(sourceColumn, semanticType, stm.getSource(), stm.getLink(), stm.getTarget());
mappings.add(mp);
weight = stm.getLink().getWeight();
// foundInternalNodes.add(stm.getSource());
}
}
logger.debug("adding data property to the found internal nodes ...");
Integer count;
boolean allowMultipleSamePropertiesPerNode = modelingConfiguration.isMultipleSamePropertyPerNode();
Set<Node> nodesWithSameUriOfDomain = this.graphBuilder.getUriToNodesMap().get(domainUri);
if (nodesWithSameUriOfDomain != null) {
for (Node source : nodesWithSameUriOfDomain) {
count = this.graphBuilder.getNodeDataPropertyCount().get(source.getId() + propertyUri);
if (count != null) {
if (allowMultipleSamePropertiesPerNode) {
if (count >= countOfSemanticType.intValue())
continue;
} else {
if (count >= 1)
continue;
}
}
String nodeId = new RandomGUID().toString();
ColumnNode target = new ColumnNode(nodeId, nodeId, sourceColumn.getColumnName(), null);
if (!this.graphBuilder.addNode(target)) continue;;
addedNodes.add(target);
String linkId = LinkIdFactory.getLinkId(propertyUri, source.getId(), target.getId());
LabeledLink link = new DataPropertyLink(linkId, new Label(propertyUri));
boolean result = weight == null ? this.graphBuilder.addLink(source, target, link) : this.graphBuilder.addLink(source, target, link, weight);
if (!result) continue;;
SemanticTypeMapping mp = new SemanticTypeMapping(sourceColumn, semanticType, (InternalNode)source, link, target);
mappings.add(mp);
}
}
return mappings;
}
private SemanticTypeMapping addSemanticTypeStruct(ColumnNode sourceColumn, SemanticType semanticType, Set<Node> addedNodes) {
logger.debug("adding semantic type to the graph ... ");
if (addedNodes == null)
addedNodes = new HashSet<Node>();
if (semanticType == null) {
logger.error("semantic type is null.");
return null;
}
if (semanticType.getDomain() == null) {
logger.error("semantic type does not have any domain");
return null;
}
if (semanticType.getType() == null) {
logger.error("semantic type does not have any link");
return null;
}
String domainUri = semanticType.getDomain().getUri();
String propertyUri = semanticType.getType().getUri();
Double confidence = semanticType.getConfidenceScore();
Origin origin = semanticType.getOrigin();
if (domainUri == null || domainUri.isEmpty()) {
logger.error("semantic type does not have any domain");
return null;
}
if (propertyUri == null || propertyUri.isEmpty()) {
logger.error("semantic type does not have any link");
return null;
}
logger.debug("semantic type: " + domainUri + "|" + propertyUri + "|" + confidence + "|" + origin);
InternalNode source = null;
String nodeId;
Set<Node> nodesWithSameUri = this.graphBuilder.getUriToNodesMap().get(domainUri);
if (nodesWithSameUri != null && !nodesWithSameUri.isEmpty()) {
InternalNode copyFrom = (InternalNode)nodesWithSameUri.iterator().next();
source = this.graphBuilder.copyNode(copyFrom, false);
if (source == null) return null;
} else {
nodeId = nodeIdFactory.getNodeId(domainUri);
source = new InternalNode(nodeId, new Label(domainUri));
if (!this.graphBuilder.addNodeAndUpdate(source, addedNodes)) return null;
}
nodeId = new RandomGUID().toString();
ColumnNode target = new ColumnNode(nodeId, nodeId, sourceColumn.getColumnName(), null);
if (!this.graphBuilder.addNode(target)) return null;
addedNodes.add(target);
String linkId = LinkIdFactory.getLinkId(propertyUri, source.getId(), target.getId());
LabeledLink link;
if (propertyUri.equalsIgnoreCase(ClassInstanceLink.getFixedLabel().getUri()))
link = new ClassInstanceLink(linkId);
else {
Label label = this.ontologyManager.getUriLabel(propertyUri);
link = new DataPropertyLink(linkId, label);
}
if (!this.graphBuilder.addLink(source, target, link)) return null;
SemanticTypeMapping mappingStruct = new SemanticTypeMapping(sourceColumn, semanticType, source, link, target);
return mappingStruct;
}
private static double roundDecimals(double d, int k) {
String format = "";
for (int i = 0; i < k; i++) format += "#";
DecimalFormat DForm = new DecimalFormat("#." + format);
return Double.valueOf(DForm.format(d));
}
private static void getStatistics(List<SemanticModel> semanticModels) {
for (int i = 0; i < semanticModels.size(); i++) {
SemanticModel source = semanticModels.get(i);
int attributeCount = source.getColumnNodes().size();
int nodeCount = source.getGraph().vertexSet().size();
int linkCount = source.getGraph().edgeSet().size();
int datanodeCount = 0;
int classNodeCount = 0;
for (Node n : source.getGraph().vertexSet()) {
if (n instanceof InternalNode) classNodeCount++;
if (n instanceof ColumnNode) datanodeCount++;
}
// System.out.println(attributeCount + "\t" + nodeCount + "\t" + linkCount + "\t" + classNodeCount + "\t" + datanodeCount);
List<ColumnNode> columnNodes = source.getColumnNodes();
if (columnNodes == null)
return;
int numberOfAttributesWhoseTypeIsFirstCRFType = 0;
int numberOfAttributesWhoseTypeIsInCRFTypes = 0;
for (ColumnNode cn : columnNodes) {
List<SemanticType> userSemanticTypes = cn.getUserSemanticTypes();
List<SemanticType> top4Suggestions = cn.getTopKLearnedSemanticTypes(4);
for (int j = 0; j < top4Suggestions.size(); j++) {
SemanticType st = top4Suggestions.get(j);
if (userSemanticTypes != null) {
for (SemanticType t : userSemanticTypes) {
if (st.getModelLabelString().equalsIgnoreCase(t.getModelLabelString())) {
if (j == 0) numberOfAttributesWhoseTypeIsFirstCRFType ++;
numberOfAttributesWhoseTypeIsInCRFTypes ++;
j = top4Suggestions.size();
break;
}
}
}
}
}
// System.out.println(numberOfAttributesWhoseTypeIsInCRFTypes + "\t" + numberOfAttributesWhoseTypeIsFirstCRFType);
System.out.println(
attributeCount + "\t" +
nodeCount + "\t" +
linkCount + "\t" +
(linkCount - attributeCount) + "\t" +
classNodeCount + "\t" +
datanodeCount + "\t" +
numberOfAttributesWhoseTypeIsInCRFTypes + "\t" +
numberOfAttributesWhoseTypeIsFirstCRFType);
}
}
public static void main(String[] args) throws Exception {
ServletContextParameterMap contextParameters = ContextParametersRegistry.getInstance().registerByKarmaHome("/Users/mohsen/karma/");
contextParameters.setParameterValue(ContextParameter.USER_DIRECTORY_PATH, "/Users/mohsen/karma/");
contextParameters.setParameterValue(ContextParameter.USER_CONFIG_DIRECTORY, "/Users/mohsen/karma/config");
OntologyManager ontologyManager = new OntologyManager(contextParameters.getId());
File ff = new File(Params.ONTOLOGY_DIR);
File[] files = ff.listFiles();
if (files == null) {
logger.error("no ontology to import at " + ff.getAbsolutePath());
return;
}
for (File f : files) {
if (f.getName().endsWith(".owl") ||
f.getName().endsWith(".rdf") ||
f.getName().endsWith(".n3") ||
f.getName().endsWith(".ttl") ||
f.getName().endsWith(".xml")) {
logger.info("Loading ontology file: " + f.getAbsolutePath());
ontologyManager.doImport(f, "UTF-8");
}
}
ontologyManager.updateCache();
boolean onlyGenerateSemanticTypeStatistics = false;
boolean onlyUseOntology = true;
boolean useCorrectType = true;
int numberOfCandidates = 1;
boolean onlyEvaluateInternalLinks = true;
int maxPatternSize = 1;
boolean recreateGraphs = true;
boolean useModifiedDS = false;
String lodDSName = "ds";
// String lodDSName = "saam";
// String lodDSName = "musicbrainz";
String modelDir;
if (useModifiedDS) modelDir = Params.ROOT_DIR + "models-json-modified/";
else modelDir = Params.MODEL_DIR;
// FileUtils.cleanDirectory(new File(graphPath));
List<SemanticModel> semanticModels =
ModelReader.importSemanticModelsFromJsonFiles(modelDir, Params.MODEL_MAIN_FILE_EXT);
if (onlyGenerateSemanticTypeStatistics) {
getStatistics(semanticModels);
return;
}
ModelLearner_LOD modelLearner = null;
String filePath = Params.RESULTS_DIR + "temp/";
String filename = "";
filename += "results";
if (useModifiedDS) filename += ".modified";
filename += "." + lodDSName;
filename += useCorrectType ? ".correct":".k=" + numberOfCandidates;
filename += onlyUseOntology ? ".p0" : ".p" + maxPatternSize;
filename += onlyEvaluateInternalLinks ? ".internal":".all";
filename += ".csv";
PrintWriter resultFile = new PrintWriter(new File(filePath + filename));
resultFile.println("source \t p \t r \t t \n");
String sourceName;
for (int i = 0; i < semanticModels.size(); i++) {
// for (int i = 0; i <= 10; i++) {
// int i = 11; {
int newSourceIndex = i;
SemanticModel newSource = semanticModels.get(newSourceIndex);
sourceName = newSource.getId().substring(0, newSource.getId().lastIndexOf("."));
String outputDir = Params.OUTPUT_DIR + sourceName + "/";
File f = new File(outputDir);
if (!f.exists()) {
f.mkdir();
}
String graphDir = Params.GRAPHS_DIR + sourceName + "/";
f = new File(graphDir);
if (!f.exists()) {
f.mkdir();
}
logger.info("======================================================");
logger.info(newSource.getName() + "(#attributes:" + newSource.getColumnNodes().size() + ")");
System.out.println(newSource.getName() + "(#attributes:" + newSource.getColumnNodes().size() + ")");
logger.info("======================================================");
SemanticModel correctModel = newSource;
List<ColumnNode> columnNodes = correctModel.getColumnNodes();
List<Node> steinerNodes = new LinkedList<Node>(columnNodes);
String graphName, graphPath;
graphName = lodDSName + ".p" + maxPatternSize;
graphPath = graphDir + graphName + Params.GRAPH_JSON_FILE_EXT;
if (onlyUseOntology) {
modelLearner = new ModelLearner_LOD(new GraphBuilder(ontologyManager, false), steinerNodes);
} else if (!recreateGraphs && new File(graphPath).exists()) {
// read graph from file
try {
logger.info("loading the graph ...");
DirectedWeightedMultigraph<Node, DefaultLink> graph = GraphUtil.importJson(graphPath);
modelLearner = new ModelLearner_LOD(new GraphBuilderTopK(ontologyManager, graph), steinerNodes);
} catch (Exception e) {
e.printStackTrace();
resultFile.close();
return;
}
} else
{
logger.info("building the graph ...");
// create and save the graph to file
// GraphBuilder_Popularity b = new GraphBuilder_Popularity(ontologyManager,
// Params.LOD_OBJECT_PROPERIES_FILE,
// Params.LOD_DATA_PROPERIES_FILE);
String patternPath;
if (lodDSName.equalsIgnoreCase("ds"))
patternPath = Params.LOD_DIR + sourceName + "/" + Params.PATTERNS_OUTPUT_DIR;
else
patternPath = Params.LOD_DIR + lodDSName + "/" + Params.PATTERNS_OUTPUT_DIR;
GraphBuilder_LOD_Pattern b = new GraphBuilder_LOD_Pattern(ontologyManager, patternPath, maxPatternSize);
b.serialize(graphPath);
modelLearner = new ModelLearner_LOD(b.getGraphBuilder(), steinerNodes);
}
long start = System.currentTimeMillis();
List<SortableSemanticModel> hypothesisList = modelLearner.hypothesize(useCorrectType, numberOfCandidates);
long elapsedTimeMillis = System.currentTimeMillis() - start;
float elapsedTimeSec = elapsedTimeMillis/1000F;
List<SortableSemanticModel> topHypotheses = null;
if (hypothesisList != null) {
// for (SortableSemanticModel sss : hypothesisList) {
// ModelEvaluation mmm = sss.evaluate(correctModel);
// System.out.println(mmm.getPrecision() + ", " + mmm.getRecall());
// }
topHypotheses = hypothesisList.size() > 10 ?
hypothesisList.subList(0, 10) :
hypothesisList;
}
Map<String, SemanticModel> models =
new TreeMap<String, SemanticModel>();
ModelEvaluation me;
models.put("1-correct model", correctModel);
if (topHypotheses != null)
for (int k = 0; k < topHypotheses.size(); k++) {
SortableSemanticModel m = topHypotheses.get(k);
me = m.evaluate(correctModel, onlyEvaluateInternalLinks, false);
String label = "candidate " + k + "\n" +
// (m.getSteinerNodes() == null ? "" : m.getSteinerNodes().getScoreDetailsString()) +
// "link coherence:" + (m.getLinkCoherence() == null ? "" : m.getLinkCoherence().getCoherenceValue()) + "\n";
"link coherence:" + (m.getLinkCoherence() == null ? "" : m.getCoherenceString()) + "\n";
label += (m.getSteinerNodes() == null || m.getSteinerNodes().getCoherence() == null) ?
"" : "node coherence:" + m.getSteinerNodes().getCoherence().getCoherenceValue() + "\n";
label += "confidence:" + m.getConfidenceScore() + "\n";
label += m.getSteinerNodes() == null ? "" : "mapping score:" + m.getSteinerNodes().getScore() + "\n";
label +=
"cost:" + roundDecimals(m.getCost(), 6) + "\n" +
// "-distance:" + me.getDistance() +
"-precision:" + me.getPrecision() +
"-recall:" + me.getRecall();
models.put(label, m);
if (k == 0) { // first rank model
System.out.println("precision: " + me.getPrecision() +
", recall: " + me.getRecall() +
", time: " + elapsedTimeSec);
logger.info("precision: " + me.getPrecision() +
", recall: " + me.getRecall() +
", time: " + elapsedTimeSec);
String s = newSource.getName() + "\t" + me.getPrecision() + "\t" + me.getRecall() + "\t" + elapsedTimeSec;
resultFile.println(s);
}
}
String outputPath, outputName;
outputName = newSource.getName();
if (useModifiedDS) outputName += ".modified";
outputName += lodDSName.equals("ds") ? "" : ("." + lodDSName);
outputName += onlyUseOntology ? ".p0" : ".p" + maxPatternSize;
outputName += Params.GRAPHVIS_OUT_FILE_EXT;
outputPath = outputDir + outputName;
GraphVizUtil.exportSemanticModelsToGraphviz(
models,
newSource.getName(),
outputPath,
GraphVizLabelType.LocalId,
GraphVizLabelType.LocalUri,
true,
true);
}
resultFile.close();
}
}