/*******************************************************************************
* Copyright 2012 University of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This code was developed by the Information Integration Group as part
* of the Karma project at the Information Sciences Institute of the
* University of Southern California. For more information, publications,
* and related projects, please see: http://www.isi.edu/integration
******************************************************************************/
package edu.isi.karma.modeling.alignment.learner;
import java.io.File;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.jgrapht.UndirectedGraph;
import org.jgrapht.graph.AsUndirectedGraph;
import org.jgrapht.graph.DirectedWeightedMultigraph;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import edu.isi.karma.config.ModelingConfiguration;
import edu.isi.karma.config.ModelingConfigurationRegistry;
import edu.isi.karma.modeling.ModelingParams;
import edu.isi.karma.modeling.Uris;
import edu.isi.karma.modeling.alignment.GraphBuilder;
import edu.isi.karma.modeling.alignment.GraphUtil;
import edu.isi.karma.modeling.alignment.GraphVizLabelType;
import edu.isi.karma.modeling.alignment.GraphVizUtil;
import edu.isi.karma.modeling.alignment.LinkFrequency;
import edu.isi.karma.modeling.alignment.LinkIdFactory;
import edu.isi.karma.modeling.alignment.ModelEvaluation;
import edu.isi.karma.modeling.alignment.NodeIdFactory;
import edu.isi.karma.modeling.alignment.SemanticModel;
import edu.isi.karma.modeling.alignment.SteinerTree;
import edu.isi.karma.modeling.alignment.TreePostProcess;
import edu.isi.karma.modeling.ontology.OntologyManager;
import edu.isi.karma.modeling.research.Params;
import edu.isi.karma.rep.alignment.ClassInstanceLink;
import edu.isi.karma.rep.alignment.ColumnNode;
import edu.isi.karma.rep.alignment.DataPropertyLink;
import edu.isi.karma.rep.alignment.DefaultLink;
import edu.isi.karma.rep.alignment.InternalNode;
import edu.isi.karma.rep.alignment.Label;
import edu.isi.karma.rep.alignment.LabeledLink;
import edu.isi.karma.rep.alignment.Node;
import edu.isi.karma.rep.alignment.ObjectPropertyLink;
import edu.isi.karma.rep.alignment.SemanticType;
import edu.isi.karma.rep.alignment.SemanticType.Origin;
import edu.isi.karma.rep.alignment.SubClassLink;
import edu.isi.karma.util.RandomGUID;
import edu.isi.karma.webserver.ContextParametersRegistry;
import edu.isi.karma.webserver.ServletContextParameterMap;
public class ModelLearner_Old {
private static Logger logger = LoggerFactory.getLogger(ModelLearner_Old.class);
private OntologyManager ontologyManager = null;
private GraphBuilder graphBuilder = null;
private NodeIdFactory nodeIdFactory = null;
private List<ColumnNode> columnNodes = null;
private SemanticModel semanticModel = null;
private long lastUpdateTimeOfGraph;
private ModelLearningGraph modelLearningGraph = null;
private boolean useAlignmentGraphBuiltFromKnownModels = false;
private static final int NUM_SEMANTIC_TYPES = 4;
public ModelLearner_Old(OntologyManager ontologyManager,
List<ColumnNode> columnNodes) {
if (ontologyManager == null ||
columnNodes == null ||
columnNodes.isEmpty()) {
logger.error("cannot instanciate model learner!");
return;
}
this.useAlignmentGraphBuiltFromKnownModels = true;
this.ontologyManager = ontologyManager;
this.columnNodes = columnNodes;
this.init();
}
public ModelLearner_Old(GraphBuilder graphBuilder,
List<ColumnNode> columnNodes) {
if (graphBuilder == null ||
columnNodes == null ||
columnNodes.isEmpty()) {
logger.error("cannot instanciate model learner!");
return;
}
// this.useAlignmentGraphBuiltFromLOD = true;
this.columnNodes = columnNodes;
this.graphBuilder = graphBuilder;
this.nodeIdFactory = this.graphBuilder.getNodeIdFactory();
this.ontologyManager = this.graphBuilder.getOntologyManager();
}
public SemanticModel getModel() {
if (this.semanticModel == null)
this.learn();
return this.semanticModel;
}
public void learn() {
if (this.useAlignmentGraphBuiltFromKnownModels && !isGraphUpToDate()) {
init();
}
List<SortableSemanticModel_Old> hypothesisList = this.hypothesize(true, NUM_SEMANTIC_TYPES);
if (hypothesisList != null && !hypothesisList.isEmpty()) {
SortableSemanticModel_Old m = hypothesisList.get(0);
this.semanticModel = new SemanticModel(m);
} else {
this.semanticModel = null;
}
}
private void init() {
this.modelLearningGraph = ModelLearningGraph.getInstance(ontologyManager, ModelLearningGraphType.Sparse);
this.lastUpdateTimeOfGraph = this.modelLearningGraph.getLastUpdateTime();
this.graphBuilder = cloneGraphBuilder(modelLearningGraph.getGraphBuilder());
this.nodeIdFactory = this.graphBuilder.getNodeIdFactory();
}
private GraphBuilder cloneGraphBuilder(GraphBuilder graphBuilder) {
GraphBuilder clonedGraphBuilder = null;
if (graphBuilder == null || graphBuilder.getGraph() == null) {
clonedGraphBuilder = new GraphBuilder(this.ontologyManager, false);
} else {
clonedGraphBuilder = new GraphBuilder(this.ontologyManager, graphBuilder.getGraph(), false);
}
this.nodeIdFactory = clonedGraphBuilder.getNodeIdFactory();
return clonedGraphBuilder;
}
private boolean isGraphUpToDate() {
if (this.lastUpdateTimeOfGraph < this.modelLearningGraph.getLastUpdateTime())
return false;
return true;
}
public List<SortableSemanticModel_Old> hypothesize(boolean useCorrectTypes, int numberOfCRFCandidates) {
Set<Node> addedNodes = new HashSet<>(); //They should be deleted from the graph after computing the semantic models
logger.info("finding candidate steiner sets ... ");
CandidateSteinerSets candidateSteinerSets = getCandidateSteinerSets(columnNodes, useCorrectTypes, numberOfCRFCandidates, addedNodes);
if (candidateSteinerSets == null ||
candidateSteinerSets.getSteinerSets() == null ||
candidateSteinerSets.getSteinerSets().isEmpty()) {
logger.error("there is no candidate set of steiner nodes.");
return null;
}
logger.info("number of steiner sets: " + candidateSteinerSets.numberOfCandidateSets());
logger.info("updating weights according to training data ...");
long start = System.currentTimeMillis();
this.updateWeights();
long updateWightsElapsedTimeMillis = System.currentTimeMillis() - start;
logger.info("time to update weights: " + (updateWightsElapsedTimeMillis/1000F));
logger.info("computing steiner trees ...");
List<SortableSemanticModel_Old> sortableSemanticModels = new ArrayList<>();
int count = 1;
for (SteinerNodes sn : candidateSteinerSets.getSteinerSets()) {
logger.debug("computing steiner tree for steiner nodes set " + count + " ...");
logger.debug(sn.getScoreDetailsString());
DirectedWeightedMultigraph<Node, LabeledLink> tree = computeSteinerTree(sn.getNodes());
count ++;
if (tree != null) {
SemanticModel sm = new SemanticModel(new RandomGUID().toString(),
tree,
columnNodes,
sn.getMappingToSourceColumns()
);
SortableSemanticModel_Old sortableSemanticModel =
new SortableSemanticModel_Old(sm, sn);
sortableSemanticModels.add(sortableSemanticModel);
}
if (count == ModelingConfigurationRegistry.getInstance().getModelingConfiguration(ContextParametersRegistry.getInstance().getContextParameters(ontologyManager.getContextId()).getKarmaHome()).getNumCandidateMappings())
break;
}
Collections.sort(sortableSemanticModels);
// logger.info("results are ready ...");
// return sortableSemanticModels;
List<SortableSemanticModel_Old> uniqueModels = new ArrayList<>();
SortableSemanticModel_Old current, previous;
if (sortableSemanticModels != null) {
if (!sortableSemanticModels.isEmpty())
uniqueModels.add(sortableSemanticModels.get(0));
for (int i = 1; i < sortableSemanticModels.size(); i++) {
current = sortableSemanticModels.get(i);
previous = sortableSemanticModels.get(i - 1);
if (current.getScore() == previous.getScore() && current.getCost() == previous.getCost())
continue;
uniqueModels.add(current);
}
}
logger.info("results are ready ...");
return uniqueModels;
}
private DirectedWeightedMultigraph<Node, LabeledLink> computeSteinerTree(Set<Node> steinerNodes) {
if (steinerNodes == null || steinerNodes.isEmpty()) {
logger.error("There is no steiner node.");
return null;
}
// System.out.println(steinerNodes.size());
List<Node> steinerNodeList = new ArrayList<>(steinerNodes);
long start = System.currentTimeMillis();
UndirectedGraph<Node, DefaultLink> undirectedGraph = new AsUndirectedGraph<>(this.graphBuilder.getGraph());
logger.debug("computing steiner tree ...");
SteinerTree steinerTree = new SteinerTree(undirectedGraph, steinerNodeList);
DirectedWeightedMultigraph<Node, LabeledLink> tree = new TreePostProcess(this.graphBuilder, steinerTree.getDefaultSteinerTree(), null, false).getTree();
//(DirectedWeightedMultigraph<Node, LabeledLink>)GraphUtil.asDirectedGraph(steinerTree.getDefaultSteinerTree());
logger.debug(GraphUtil.labeledGraphToString(tree));
long steinerTreeElapsedTimeMillis = System.currentTimeMillis() - start;
logger.debug("total number of nodes in steiner tree: " + tree.vertexSet().size());
logger.debug("total number of edges in steiner tree: " + tree.edgeSet().size());
logger.debug("time to compute steiner tree: " + (steinerTreeElapsedTimeMillis/1000F));
return tree;
// long finalTreeElapsedTimeMillis = System.currentTimeMillis() - steinerTreeElapsedTimeMillis;
// DirectedWeightedMultigraph<Node, Link> finalTree = buildOutputTree(tree);
// logger.info("time to build final tree: " + (finalTreeElapsedTimeMillis/1000F));
// GraphUtil.printGraph(finalTree);
// return finalTree;
}
private CandidateSteinerSets getCandidateSteinerSets(List<ColumnNode> columnNodes, boolean useCorrectTypes, int numberOfCRFCandidates, Set<Node> addedNodes) {
if (columnNodes == null || columnNodes.isEmpty())
return null;
int maxNumberOfSteinerNodes = columnNodes.size() * 2;
CandidateSteinerSets candidateSteinerSets = new CandidateSteinerSets(maxNumberOfSteinerNodes, ontologyManager.getContextId());
if (addedNodes == null)
addedNodes = new HashSet<>();
Set<SemanticTypeMapping> tempSemanticTypeMappings;
HashMap<ColumnNode, List<SemanticType>> columnSemanticTypes = new HashMap<>();
HashMap<String, Integer> semanticTypesCount = new HashMap<>();
List<SemanticType> candidateSemanticTypes;
String domainUri = "", propertyUri = "";
for (ColumnNode n : columnNodes) {
candidateSemanticTypes = n.getTopKLearnedSemanticTypes(numberOfCRFCandidates);
columnSemanticTypes.put(n, candidateSemanticTypes);
for (SemanticType semanticType: candidateSemanticTypes) {
if (semanticType == null ||
semanticType.getDomain() == null ||
semanticType.getType() == null) continue;
domainUri = semanticType.getDomain().getUri();
propertyUri = semanticType.getType().getUri();
Integer count = semanticTypesCount.get(domainUri + propertyUri);
if (count == null) semanticTypesCount.put(domainUri + propertyUri, 1);
else semanticTypesCount.put(domainUri + propertyUri, count.intValue() + 1);
}
}
int numOfMappings = 1;
for (ColumnNode n : columnNodes) {
candidateSemanticTypes = columnSemanticTypes.get(n);
if (candidateSemanticTypes == null) continue;
logger.info("===== Column: " + n.getColumnName());
Set<SemanticTypeMapping> semanticTypeMappings = new HashSet<>();
for (SemanticType semanticType: candidateSemanticTypes) {
logger.info("\t===== Semantic Type: " + semanticType.getModelLabelString());
if (semanticType == null ||
semanticType.getDomain() == null ||
semanticType.getType() == null) continue;
domainUri = semanticType.getDomain().getUri();
propertyUri = semanticType.getType().getUri();
Integer countOfSemanticType = semanticTypesCount.get(domainUri + propertyUri);
// logger.info("count of semantic type: " + countOfSemanticType);
tempSemanticTypeMappings = findSemanticTypeInGraph(n, semanticType, semanticTypesCount, addedNodes);
// logger.info("number of matches for semantic type: " +
// + (tempSemanticTypeMappings == null ? 0 : tempSemanticTypeMappings.size()));
if (tempSemanticTypeMappings != null)
semanticTypeMappings.addAll(tempSemanticTypeMappings);
int countOfMatches = tempSemanticTypeMappings == null ? 0 : tempSemanticTypeMappings.size();
if (countOfMatches < countOfSemanticType) // No struct in graph is matched with the semantic type, we add a new struct to the graph
{
for (int i = 0; i < countOfSemanticType - countOfMatches; i++) {
SemanticTypeMapping mp = addSemanticTypeStruct(n, semanticType, addedNodes);
if (mp != null)
semanticTypeMappings.add(mp);
}
}
}
// System.out.println("number of matches for column " + n.getColumnName() +
// ": " + (semanticTypeMappings == null ? 0 : semanticTypeMappings.size()));
logger.info("number of matches for column " + n.getColumnName() +
": " + (semanticTypeMappings == null ? 0 : semanticTypeMappings.size()));
numOfMappings *= semanticTypeMappings == null || semanticTypeMappings.isEmpty() ? 1 : semanticTypeMappings.size();
candidateSteinerSets.updateSteinerSets(semanticTypeMappings);
}
// System.out.println("number of possible mappings: " + numOfMappings);
logger.info("number of possible mappings: " + numOfMappings);
return candidateSteinerSets;
}
private Set<SemanticTypeMapping> findSemanticTypeInGraph(ColumnNode sourceColumn, SemanticType semanticType,
HashMap<String, Integer> semanticTypesCount, Set<Node> addedNodes) {
logger.debug("finding matches for semantic type in the graph ... ");
if (addedNodes == null)
addedNodes = new HashSet<>();
Set<SemanticTypeMapping> mappings = new HashSet<>();
if (semanticType == null) {
logger.error("semantic type is null.");
return mappings;
}
if (semanticType.getDomain() == null) {
logger.error("semantic type does not have any domain");
return mappings;
}
if (semanticType.getType() == null) {
logger.error("semantic type does not have any link");
return mappings;
}
String domainUri = semanticType.getDomain().getUri();
String propertyUri = semanticType.getType().getUri();
Double confidence = semanticType.getConfidenceScore();
Origin origin = semanticType.getOrigin();
Integer countOfSemanticType = semanticTypesCount.get(domainUri + propertyUri);
if (countOfSemanticType == null) {
logger.error("count of semantic type should not be null or zero");
return mappings;
}
if (domainUri == null || domainUri.isEmpty()) {
logger.error("semantic type does not have any domain");
return mappings;
}
if (propertyUri == null || propertyUri.isEmpty()) {
logger.error("semantic type does not have any link");
return mappings;
}
logger.debug("semantic type: " + domainUri + "|" + propertyUri + "|" + confidence + "|" + origin);
// add dataproperty to existing classes if sl is a data node mapping
// Set<Node> foundInternalNodes = new HashSet<Node>();
Set<SemanticTypeMapping> semanticTypeMatches = this.graphBuilder.getSemanticTypeMatches().get(domainUri + propertyUri);
if (semanticTypeMatches != null) {
for (SemanticTypeMapping stm : semanticTypeMatches) {
SemanticTypeMapping mp =
new SemanticTypeMapping(sourceColumn, semanticType, stm.getSource(), stm.getLink(), stm.getTarget());
mappings.add(mp);
// foundInternalNodes.add(stm.getSource());
}
}
logger.debug("adding data property to the found internal nodes ...");
Integer count;
boolean allowMultipleSamePropertiesPerNode = ModelingConfigurationRegistry.getInstance().getModelingConfiguration(ContextParametersRegistry.getInstance().getContextParameters(ontologyManager.getContextId()).getKarmaHome()).isMultipleSamePropertyPerNode();
Set<Node> nodesWithSameUriOfDomain = this.graphBuilder.getUriToNodesMap().get(domainUri);
if (nodesWithSameUriOfDomain != null) {
for (Node source : nodesWithSameUriOfDomain) {
count = this.graphBuilder.getNodeDataPropertyCount().get(source.getId() + propertyUri);
if (count != null) {
if (allowMultipleSamePropertiesPerNode) {
if (count >= countOfSemanticType.intValue())
continue;
} else {
if (count >= 1)
continue;
}
}
String nodeId = new RandomGUID().toString();
ColumnNode target = new ColumnNode(nodeId, nodeId, sourceColumn.getColumnName(), null, null);
if (!this.graphBuilder.addNode(target)) continue;;
addedNodes.add(target);
String linkId = LinkIdFactory.getLinkId(propertyUri, source.getId(), target.getId());
LabeledLink link = new DataPropertyLink(linkId, new Label(propertyUri));
if (!this.graphBuilder.addLink(source, target, link)) continue;;
SemanticTypeMapping mp = new SemanticTypeMapping(sourceColumn, semanticType, (InternalNode)source, link, target);
mappings.add(mp);
}
}
return mappings;
}
private SemanticTypeMapping addSemanticTypeStruct(ColumnNode sourceColumn, SemanticType semanticType, Set<Node> addedNodes) {
logger.debug("adding semantic type to the graph ... ");
if (addedNodes == null)
addedNodes = new HashSet<>();
if (semanticType == null) {
logger.error("semantic type is null.");
return null;
}
if (semanticType.getDomain() == null) {
logger.error("semantic type does not have any domain");
return null;
}
if (semanticType.getType() == null) {
logger.error("semantic type does not have any link");
return null;
}
String domainUri = semanticType.getDomain().getUri();
String propertyUri = semanticType.getType().getUri();
Double confidence = semanticType.getConfidenceScore();
Origin origin = semanticType.getOrigin();
if (domainUri == null || domainUri.isEmpty()) {
logger.error("semantic type does not have any domain");
return null;
}
if (propertyUri == null || propertyUri.isEmpty()) {
logger.error("semantic type does not have any link");
return null;
}
logger.debug("semantic type: " + domainUri + "|" + propertyUri + "|" + confidence + "|" + origin);
InternalNode source = null;
String nodeId;
nodeId = nodeIdFactory.getNodeId(domainUri);
source = new InternalNode(nodeId, new Label(domainUri));
if (!this.graphBuilder.addNodeAndUpdate(source, addedNodes)) return null;
nodeId = new RandomGUID().toString();
ColumnNode target = new ColumnNode(nodeId, nodeId, sourceColumn.getColumnName(), null, null);
if (!this.graphBuilder.addNode(target)) return null;
addedNodes.add(target);
String linkId = LinkIdFactory.getLinkId(propertyUri, source.getId(), target.getId());
LabeledLink link;
if (propertyUri.equalsIgnoreCase(ClassInstanceLink.getFixedLabel().getUri()))
link = new ClassInstanceLink(linkId);
else {
Label label = this.ontologyManager.getUriLabel(propertyUri);
link = new DataPropertyLink(linkId, label);
}
if (!this.graphBuilder.addLink(source, target, link)) return null;
SemanticTypeMapping mappingStruct = new SemanticTypeMapping(sourceColumn, semanticType, source, link, target);
return mappingStruct;
}
private void updateWeights() {
List<DefaultLink> oldLinks = new ArrayList<>();
List<Node> sources = new ArrayList<>();
List<Node> targets = new ArrayList<>();
List<LabeledLink> newLinks = new ArrayList<>();
List<Double> weights = new ArrayList<>();
HashMap<String, LinkFrequency> sourceTargetLinkFrequency =
new HashMap<>();
LinkFrequency lf1, lf2;
String key, key1, key2;
String linkUri;
for (DefaultLink link : this.graphBuilder.getGraph().edgeSet()) {
linkUri = link.getUri();
if (!linkUri.equalsIgnoreCase(Uris.DEFAULT_LINK_URI)) {
if (link.getTarget() instanceof InternalNode && !linkUri.equalsIgnoreCase(Uris.RDFS_SUBCLASS_URI)) {
key = "domain:" + link.getSource().getLabel().getUri() + ",link:" + linkUri + ",range:" + link.getTarget().getLabel().getUri();
Integer count = this.graphBuilder.getLinkCountMap().get(key);
if (count != null)
this.graphBuilder.changeLinkWeight(link, ModelingParams.PATTERN_LINK_WEIGHT - ((double)count / (double)this.graphBuilder.getNumberOfModelLinks()) );
}
continue;
}
key1 = link.getSource().getLabel().getUri() +
link.getTarget().getLabel().getUri();
key2 = link.getTarget().getLabel().getUri() +
link.getSource().getLabel().getUri();
lf1 = sourceTargetLinkFrequency.get(key1);
if (lf1 == null) {
lf1 = this.graphBuilder.getMoreFrequentLinkBetweenNodes(link.getSource().getLabel().getUri(), link.getTarget().getLabel().getUri());
sourceTargetLinkFrequency.put(key1, lf1);
}
lf2 = sourceTargetLinkFrequency.get(key2);
if (lf2 == null) {
lf2 = this.graphBuilder.getMoreFrequentLinkBetweenNodes(link.getTarget().getLabel().getUri(), link.getSource().getLabel().getUri());
sourceTargetLinkFrequency.put(key2, lf2);
}
int c = lf1.compareTo(lf2);
String id = null;
if (c > 0) {
sources.add(link.getSource());
targets.add(link.getTarget());
id = LinkIdFactory.getLinkId(lf1.getLinkUri(), link.getSource().getId(), link.getTarget().getId());
if (link instanceof ObjectPropertyLink)
newLinks.add(new ObjectPropertyLink(id, new Label(lf1.getLinkUri()), ((ObjectPropertyLink) link).getObjectPropertyType()));
else if (link instanceof SubClassLink)
newLinks.add(new SubClassLink(id));
weights.add(lf1.getWeight());
} else if (c < 0) {
sources.add(link.getTarget());
targets.add(link.getSource());
id = LinkIdFactory.getLinkId(lf2.getLinkUri(), link.getSource().getId(), link.getTarget().getId());
if (link instanceof ObjectPropertyLink)
newLinks.add(new ObjectPropertyLink(id, new Label(lf2.getLinkUri()), ((ObjectPropertyLink) link).getObjectPropertyType()));
else if (link instanceof SubClassLink)
newLinks.add(new SubClassLink(id));
weights.add(lf2.getWeight());
} else
continue;
oldLinks.add(link);
}
for (DefaultLink link : oldLinks)
this.graphBuilder.getGraph().removeEdge(link);
LabeledLink newLink;
for (int i = 0; i < newLinks.size(); i++) {
newLink = newLinks.get(i);
this.graphBuilder.addLink(sources.get(i), targets.get(i), newLink);
this.graphBuilder.changeLinkWeight(newLink, weights.get(i));
}
}
private static double roundTwoDecimals(double d) {
DecimalFormat twoDForm = new DecimalFormat("#.##");
return Double.valueOf(twoDForm.format(d));
}
@SuppressWarnings("unused")
private static void getStatistics1(List<SemanticModel> semanticModels) {
for (int i = 0; i < semanticModels.size(); i++) {
SemanticModel source = semanticModels.get(i);
int attributeCount = source.getColumnNodes().size();
int nodeCount = source.getGraph().vertexSet().size();
int linkCount = source.getGraph().edgeSet().size();
int datanodeCount = 0;
int classNodeCount = 0;
for (Node n : source.getGraph().vertexSet()) {
if (n instanceof InternalNode) classNodeCount++;
if (n instanceof ColumnNode) datanodeCount++;
}
System.out.println(attributeCount + "\t" + nodeCount + "\t" + linkCount + "\t" + classNodeCount + "\t" + datanodeCount);
List<ColumnNode> columnNodes = source.getColumnNodes();
getStatistics2(columnNodes);
}
}
private static void getStatistics2(List<ColumnNode> columnNodes) {
if (columnNodes == null)
return;
int numberOfAttributesWhoseTypeIsFirstCRFType = 0;
int numberOfAttributesWhoseTypeIsInCRFTypes = 0;
for (ColumnNode cn : columnNodes) {
List<SemanticType> userSemanticTypes = cn.getUserSemanticTypes();
List<SemanticType> top4Suggestions = cn.getTopKLearnedSemanticTypes(4);
for (int i = 0; i < top4Suggestions.size(); i++) {
SemanticType st = top4Suggestions.get(i);
if (userSemanticTypes != null) {
for (SemanticType t : userSemanticTypes) {
if (st.getModelLabelString().equalsIgnoreCase(t.getModelLabelString())) {
if (i == 0) numberOfAttributesWhoseTypeIsFirstCRFType ++;
numberOfAttributesWhoseTypeIsInCRFTypes ++;
i = top4Suggestions.size();
break;
}
}
}
}
}
// System.out.println(columnNodes.size() + "\t" + numberOfAttributesWhoseTypeIsInCRFTypes + "\t" + numberOfAttributesWhoseTypeIsFirstCRFType);
System.out.println("totalNumberOfAttributes: " + columnNodes.size());
System.out.println("numberOfAttributesWhoseTypeIsInCRFTypes: " + numberOfAttributesWhoseTypeIsInCRFTypes);
System.out.println("numberOfAttributesWhoseTypeIsFirstCRFType:" + numberOfAttributesWhoseTypeIsFirstCRFType);
}
public static void test() throws Exception {
ServletContextParameterMap contextParameters = ContextParametersRegistry.getInstance().getDefault();
ModelingConfiguration modelingConfiguration = ModelingConfigurationRegistry.getInstance().getModelingConfiguration(contextParameters.getId());
// String inputPath = Params.INPUT_DIR;
String outputPath = Params.OUTPUT_DIR;
String graphPath = Params.GRAPHS_DIR;
// List<SemanticModel> semanticModels = ModelReader.importSemanticModels(inputPath);
List<SemanticModel> semanticModels =
ModelReader.importSemanticModelsFromJsonFiles(Params.MODEL_DIR, Params.MODEL_MAIN_FILE_EXT);
// ModelEvaluation me2 = semanticModels.get(20).evaluate(semanticModels.get(20));
// System.out.println(me2.getPrecision() + "--" + me2.getRecall());
// if (true)
// return;
List<SemanticModel> trainingData = new ArrayList<>();
OntologyManager ontologyManager = new OntologyManager(contextParameters.getId());
File ff = new File(Params.ONTOLOGY_DIR);
File[] files = ff.listFiles();
for (File f : files) {
ontologyManager.doImport(f, "UTF-8");
}
ontologyManager.updateCache();
// getStatistics1(semanticModels);
// if (true)
// return;
ModelLearningGraph modelLearningGraph = null;
ModelLearner_Old modelLearner;
boolean iterativeEvaluation = false;
boolean useCorrectType = false;
int numberOfCRFCandidates = 4;
int numberOfKnownModels;
String filePath = Params.RESULTS_DIR;
String filename = "results,k=" + numberOfCRFCandidates + ".csv";
PrintWriter resultFile = new PrintWriter(new File(filePath + filename));
StringBuffer[] resultsArray = new StringBuffer[semanticModels.size() + 2];
for (int i = 0; i < resultsArray.length; i++) {
resultsArray[i] = new StringBuffer();
}
for (int i = 0; i < semanticModels.size(); i++) {
// for (int i = 0; i <= 10; i++) {
// int i = 3; {
resultFile.flush();
int newSourceIndex = i;
SemanticModel newSource = semanticModels.get(newSourceIndex);
logger.info("======================================================");
logger.info(newSource.getName() + "(#attributes:" + newSource.getColumnNodes().size() + ")");
System.out.println(newSource.getName() + "(#attributes:" + newSource.getColumnNodes().size() + ")");
logger.info("======================================================");
if (!iterativeEvaluation)
numberOfKnownModels = semanticModels.size() - 1;
else
numberOfKnownModels = 0;
if (resultsArray[0].length() > 0) resultsArray[0].append(" \t ");
resultsArray[0].append(newSource.getName() + "(" + newSource.getColumnNodes().size() + ")" + "\t" + " " + "\t" + " ");
if (resultsArray[1].length() > 0) resultsArray[1].append(" \t ");
resultsArray[1].append("p \t r \t t");
while (numberOfKnownModels <= semanticModels.size() - 1) {
trainingData.clear();
int j = 0, count = 0;
while (count < numberOfKnownModels) {
if (j != newSourceIndex) {
trainingData.add(semanticModels.get(j));
count++;
}
j++;
}
modelLearningGraph = (ModelLearningGraphSparse)ModelLearningGraph.getEmptyInstance(ontologyManager, ModelLearningGraphType.Sparse);
SemanticModel correctModel = newSource;
List<ColumnNode> columnNodes = correctModel.getColumnNodes();
// if (useCorrectType && numberOfCRFCandidates > 1)
// updateCrfSemanticTypesForResearchEvaluation(columnNodes);
modelLearner = new ModelLearner_Old(ontologyManager, columnNodes);
long start = System.currentTimeMillis();
String graphName = !iterativeEvaluation?
graphPath + semanticModels.get(newSourceIndex).getName() + Params.GRAPH_JSON_FILE_EXT :
graphPath + semanticModels.get(newSourceIndex).getName() + ".knownModels=" + numberOfKnownModels + Params.GRAPH_JSON_FILE_EXT;
if (new File(graphName).exists()) {
// read graph from file
try {
logger.info("loading the graph ...");
DirectedWeightedMultigraph<Node, DefaultLink> graph = GraphUtil.importJson(graphName);
modelLearner.graphBuilder = new GraphBuilder(ontologyManager, graph, false);
modelLearner.nodeIdFactory = modelLearner.graphBuilder.getNodeIdFactory();
} catch (Exception e) {
e.printStackTrace();
}
} else
{
logger.info("building the graph ...");
for (SemanticModel sm : trainingData)
modelLearningGraph.addModel(sm, PatternWeightSystem.JWSPaperFormula);
modelLearner.graphBuilder = modelLearningGraph.getGraphBuilder();
modelLearner.nodeIdFactory = modelLearner.graphBuilder.getNodeIdFactory();
// save graph to file
try {
GraphUtil.exportJson(modelLearningGraph.getGraphBuilder().getGraph(), graphName, true, true);
} catch (Exception e) {
e.printStackTrace();
}
}
List<SortableSemanticModel_Old> hypothesisList = modelLearner.hypothesize(useCorrectType, numberOfCRFCandidates);
long elapsedTimeMillis = System.currentTimeMillis() - start;
float elapsedTimeSec = elapsedTimeMillis/1000F;
List<SortableSemanticModel_Old> topHypotheses = null;
if (hypothesisList != null) {
topHypotheses = hypothesisList.size() > modelingConfiguration.getNumCandidateMappings() ?
hypothesisList.subList(0, modelingConfiguration.getNumCandidateMappings()) :
hypothesisList;
}
Map<String, SemanticModel> models =
new TreeMap<>();
// export to json
// if (topHypotheses != null)
// for (int k = 0; k < topHypotheses.size() && k < 3; k++) {
//
// String fileExt = null;
// if (k == 0) fileExt = Params.MODEL_RANK1_FILE_EXT;
// else if (k == 1) fileExt = Params.MODEL_RANK2_FILE_EXT;
// else if (k == 2) fileExt = Params.MODEL_RANK3_FILE_EXT;
// SortableSemanticModel m = topHypotheses.get(k);
// new SemanticModel(m).writeJson(Params.MODEL_DIR +
// newSource.getName() + fileExt);
//
// }
ModelEvaluation me;
models.put("1-correct model", correctModel);
if (topHypotheses != null)
for (int k = 0; k < topHypotheses.size(); k++) {
SortableSemanticModel_Old m = topHypotheses.get(k);
me = m.evaluate(correctModel);
String label = "candidate" + k +
m.getSteinerNodes().getScoreDetailsString() +
"cost:" + roundTwoDecimals(m.getCost()) +
// "-distance:" + me.getDistance() +
"-precision:" + me.getPrecision() +
"-recall:" + me.getRecall();
models.put(label, m);
if (k == 0) { // first rank model
System.out.println("number of known models: " + numberOfKnownModels +
", precision: " + me.getPrecision() +
", recall: " + me.getRecall() +
", time: " + elapsedTimeSec);
logger.info("number of known models: " + numberOfKnownModels +
", precision: " + me.getPrecision() +
", recall: " + me.getRecall() +
", time: " + elapsedTimeSec);
// resultFile.println("number of known models \t precision \t recall");
// resultFile.println(numberOfKnownModels + "\t" + me.getPrecision() + "\t" + me.getRecall());
String s = me.getPrecision() + "\t" + me.getRecall() + "\t" + elapsedTimeSec;
if (resultsArray[numberOfKnownModels + 2].length() > 0)
resultsArray[numberOfKnownModels + 2].append(" \t ");
resultsArray[numberOfKnownModels + 2].append(s);
// resultFile.println(me.getPrecision() + "\t" + me.getRecall() + "\t" + elapsedTimeSec);
}
}
String outName = !iterativeEvaluation?
outputPath + semanticModels.get(newSourceIndex).getName() + Params.GRAPHVIS_OUT_DETAILS_FILE_EXT :
outputPath + semanticModels.get(newSourceIndex).getName() + ".knownModels=" + numberOfKnownModels + Params.GRAPHVIS_OUT_DETAILS_FILE_EXT;
// if (!iterativeEvaluation) {
GraphVizUtil.exportSemanticModelsToGraphviz(
models,
newSource.getName(),
outName,
GraphVizLabelType.LocalId,
GraphVizLabelType.LocalUri,
false,
false);
// }
numberOfKnownModels ++;
}
// resultFile.println("=======================================================");
}
for (StringBuffer s : resultsArray)
resultFile.println(s.toString());
resultFile.close();
}
public static void main(String[] args) throws Exception {
test();
}
}