/******************************************************************************* * Copyright 2012 University of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This code was developed by the Information Integration Group as part * of the Karma project at the Information Sciences Institute of the * University of Southern California. For more information, publications, * and related projects, please see: http://www.isi.edu/integration ******************************************************************************/ package edu.isi.karma.modeling.alignment.learner; import java.text.DecimalFormat; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import edu.isi.karma.config.ModelingConfiguration; import edu.isi.karma.config.ModelingConfigurationRegistry; import edu.isi.karma.rep.alignment.ColumnNode; import edu.isi.karma.rep.alignment.InternalNode; import edu.isi.karma.rep.alignment.Node; import edu.isi.karma.webserver.ContextParametersRegistry; public class SteinerNodes implements Comparable<SteinerNodes> { // private static Logger logger = LoggerFactory.getLogger(SteinerNodes.class); private static final double MIN_CONFIDENCE = 1E-6; private Set<Node> nodes; private Map<ColumnNode, ColumnNode> mappingToSourceColumns; private Confidence confidence; private NodeCoherence nodeCoherence; // private int frequency; private double score; private int semanticTypesCount; private int nonModelNodesCount; // nodes that do not belong to any pattern private Map<ColumnNode, SemanticTypeMapping> columnNodeInfo; private String contextId; public SteinerNodes(String contextId) { this.contextId = contextId; this.nodes = new HashSet<>(); this.mappingToSourceColumns = new HashMap<>(); this.columnNodeInfo = new HashMap<>(); this.semanticTypesCount = 0; this.confidence = new Confidence(); this.nodeCoherence = new NodeCoherence(); this.nonModelNodesCount = 0; // this.frequency = 0; this.score = 0.0; } public SteinerNodes(SteinerNodes steinerNodes,String contextId) { this.contextId = contextId; this.nodes = new HashSet<>(steinerNodes.getNodes()); this.mappingToSourceColumns = new HashMap<>(steinerNodes.getMappingToSourceColumns()); this.columnNodeInfo = new HashMap<>(steinerNodes.getColumnNodeInfo()); this.confidence = new Confidence(steinerNodes.getConfidence()); this.nodeCoherence = new NodeCoherence(steinerNodes.getCoherence()); // this.frequency = steinerNodes.getFrequency(); this.semanticTypesCount = steinerNodes.getSemanticTypesCount(); this.nonModelNodesCount = steinerNodes.getNonModelNodesCount(); this.score = steinerNodes.getScore(); } public Set<Node> getNodes() { return Collections.unmodifiableSet(this.nodes); } public Map<ColumnNode, ColumnNode> getMappingToSourceColumns() { return mappingToSourceColumns; } public int getSemanticTypesCount() { return semanticTypesCount; } public Map<ColumnNode, SemanticTypeMapping> getColumnNodeInfo() { return columnNodeInfo; } // public boolean addNodes(ColumnNode sourceColumn, InternalNode n1, ColumnNode n2, double confidence) { public boolean addNodes(SemanticTypeMapping stm) { ColumnNode sourceColumn = stm.getSourceColumn(); InternalNode n1 = stm.getSource(); ColumnNode n2 = stm.getTarget(); double confidence = stm.getConfidence(); if (this.nodes.contains(n1) && this.nodes.contains(n2)) return false; this.semanticTypesCount ++; if (!this.nodes.contains(n1)) { this.nodes.add(n1); this.nodeCoherence.updateCoherence(n1); if (n1.getModelIds() == null || n1.getModelIds().isEmpty()) this.nonModelNodesCount ++; } if (!this.nodes.contains(n2)) { this.nodes.add(n2); this.nodeCoherence.updateCoherence(n2); this.columnNodeInfo.put(n2, stm); this.mappingToSourceColumns.put(n2, sourceColumn); if (n2.getModelIds() == null || n2.getModelIds().isEmpty()) this.nonModelNodesCount ++; } if (confidence <= 0 || confidence > 1) confidence = MIN_CONFIDENCE; this.confidence.addValue(confidence); this.computeScore(); return true; } public boolean addNode(InternalNode n) { if (this.nodes.contains(n)) return false; this.nodes.add(n); this.nodeCoherence.updateCoherence(n); if (n.getModelIds() == null || n.getModelIds().isEmpty()) this.nonModelNodesCount ++; this.computeScore(); return true; } public Confidence getConfidence() { return this.confidence; } public int getNodesCount() { return this.nodes.size(); } public int getNonModelNodesCount() { return this.nonModelNodesCount; } public double getScore() { return this.score; } public NodeCoherence getCoherence() { return this.nodeCoherence; } // public int getFrequency() { // return frequency; // } // private int computeFrequency() { // int frequency = 0; // for (Node n : this.nodes) // frequency += n.getPatternIds().size(); // return frequency; // } // private double computeConfidenceValue() { // // if (this.confidenceList.size() == 1) // return 1e-10; // // double confidence = 1.0; // // for (double d : this.confidenceList) { // if (d == 0) // confidence *= 1e-10; // else // confidence *= d; // } // // return confidence; // } private double getSizeReduction() { int minSize = this.semanticTypesCount; int maxSize = this.semanticTypesCount * 2; if (maxSize - minSize == 0) return 0.0; //feature scaling: (x - min) / (max - min) // here: x: reduction in size --- min reduction: 0 --- max reduction: maxSize - minSize return (double)(maxSize - this.getNodesCount()) / (double)(maxSize - minSize); } // private double getHarmonicMean(double[] input) { // // double result = 0.0; // if (input == null) // return result; // // double min = 1E-6; // double sum = 0.0; // for (double d : input) { // if (d <= 0.0) d = min; // sum += 1.0 / d; // } // // if (sum == 0.0) // return result; // // result = (double) input.length / sum; // return result; // // } private double getArithmeticMean(double[] input) { double result; if (input == null) return 0.0; double sum = 0.0; for (double d : input) { if (d < 0.0) d = 0.0; sum += d; } result = sum / (double)input.length; return result; } private void computeScore() { ModelingConfiguration modelingConfiguration = ModelingConfigurationRegistry.getInstance().getModelingConfiguration(ContextParametersRegistry.getInstance().getContextParameters(contextId).getKarmaHome()); double confidence = this.confidence.getConfidenceValue(); double sizeReduction = this.getSizeReduction(); double coherence = this.nodeCoherence.getCoherenceValue(); //int frequency = this.getFrequency(); double alpha = modelingConfiguration.getScoringConfidenceCoefficient(); double beta = modelingConfiguration.getScoringCoherenceSCoefficient(); double gamma = modelingConfiguration.getScoringSizeCoefficient(); // // this.score = alpha * coherence + // beta * distanceToMaxSize + // gamma * confidence; double[] measures = new double[3]; measures[0] = alpha * confidence; measures[1] = beta * coherence; measures[2] = gamma * sizeReduction; // this.score = sizeReduction; // this.score = coherence; // this.score = confidence; // this.score = getHarmonicMean(measures); this.score = getArithmeticMean(measures); } @Override public int compareTo(SteinerNodes target) { double score1 = this.getScore(); double score2 = target.getScore(); if (score1 < score2) return 1; else if (score1 > score2) return -1; else return 0; } private static double roundTwoDecimals(double d) { DecimalFormat twoDForm = new DecimalFormat("#.##"); return Double.valueOf(twoDForm.format(d)); } public String getScoreDetailsString() { // this.computeCoherenceList(); StringBuffer sb = new StringBuffer(); // if (this.nodes != null) // for (Node n : this.nodes) { // if (n instanceof InternalNode) // sb.append(n.getLocalId()); // else { // if (mappingToSourceColumns.containsKey((ColumnNode)n)) // sb.append(mappingToSourceColumns.get((ColumnNode)n).getColumnName() ); // else // sb.append( ((ColumnNode)n).getColumnName() ); // } // sb.append("|"); // } // sb.append("\n"); sb.append("\n"); sb.append("node coherence: "); sb.append(nodeCoherence.printCoherenceList()); // sb.append("\n"); sb.append("--- value: " + this.nodeCoherence.getCoherenceValue()); sb.append("\n"); sb.append("size: " + this.getNodesCount() + ", max size: " + (this.semanticTypesCount * 2) + "---" + "size reduction: " + roundTwoDecimals(this.getSizeReduction()) ); sb.append("\n"); sb.append("confidence list: ("); for (Double cf : this.confidence.getValues()) { if (cf != null) sb.append( roundTwoDecimals(cf.doubleValue()) + ","); } sb.append(") --- "); sb.append("confidence: " + roundTwoDecimals(this.confidence.getConfidenceValue())); sb.append("\n"); // sb.append("total number of patterns: " + this.frequency); // sb.append("\n"); sb.append("final score: " + roundTwoDecimals(this.getScore()) + " - [arithmetic mean]"); sb.append("\n"); return sb.toString(); } }