/** * Copyright 2015-2016 by Metanome Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.metanome.backend.result_postprocessing.result_ranking; import de.metanome.algorithm_integration.ColumnIdentifier; import de.metanome.backend.result_postprocessing.helper.ColumnInformation; import de.metanome.backend.result_postprocessing.helper.TableInformation; import de.metanome.backend.result_postprocessing.results.UniqueColumnCombinationResult; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; /** * Calculates the rankings for unique column combination results. */ public class UniqueColumnCombinationRanking extends Ranking { protected List<UniqueColumnCombinationResult> results; public UniqueColumnCombinationRanking(List<UniqueColumnCombinationResult> results, Map<String, TableInformation> tableInformationMap) { super(tableInformationMap); this.results = results; this.occurrenceMap = new HashMap<>(); createOccurrenceList(); } /** * The occurrence list stores how often a column occurs in the results. */ protected void createOccurrenceList() { initializeOccurrenceList(); for (UniqueColumnCombinationResult result : this.results) { for (ColumnIdentifier column : result.getColumnCombination().getColumnIdentifiers()) { updateOccurrenceList(column); } } } @Override public void calculateDataIndependentRankings() { for (UniqueColumnCombinationResult result : this.results) { calculateColumnRatio(result); calculateOccurrenceRatio(result); } } @Override public void calculateDataDependentRankings() { for (UniqueColumnCombinationResult result : this.results) { calculateColumnRatio(result); calculateOccurrenceRatio(result); calculateUniquenessRatio(result); calculateRandomness(result); } } /** * Calculates the ratio of the size of the column combination and the column count of the * corresponding table. * * @param result the result */ protected void calculateColumnRatio(UniqueColumnCombinationResult result) { Integer columnCount = result.getColumnCombination().getColumnIdentifiers().size(); Integer tableColumnCount = this.tableInformationMap.get(result.getTableName()).getColumnCount(); result.setColumnRatio((float) columnCount / tableColumnCount); } /** * Calculates the ratio of the unique column combination count and the overall occurrence of the * columns in the result. * * @param result the result */ protected void calculateOccurrenceRatio(UniqueColumnCombinationResult result) { Set<ColumnIdentifier> columns = result.getColumnCombination().getColumnIdentifiers(); String tableName = result.getTableName(); result.setOccurrenceRatio(calculateOccurrenceRatio(columns, tableName)); } /** * Calculate the ratio of the number of almost unique columns and all columns. * * @param result the result */ protected void calculateUniquenessRatio(UniqueColumnCombinationResult result) { TableInformation table = this.tableInformationMap.get(result.getTableName()); Set<ColumnIdentifier> columns = result.getColumnCombination().getColumnIdentifiers(); result.setUniquenessRatio(calculateUniquenessRatio(table, columns)); } /** * Calculates how unique the given column combination is. Therefor the uniqueness of each column * of the column combination is determined and multiplied. Afterward this value is normalized. * * @param result the result */ protected void calculateRandomness(UniqueColumnCombinationResult result) { TableInformation table = this.tableInformationMap.get(result.getTableName()); Map<String, ColumnInformation> columnInformationMap = table.getColumnInformationMap(); Set<ColumnIdentifier> columns = result.getColumnCombination().getColumnIdentifiers(); long min = table.getRowCount(); long max = (min - 1) ^ columns.size(); float distinctValue = 1; for (ColumnIdentifier column : columns) { distinctValue = distinctValue * columnInformationMap.get(column.getColumnIdentifier()) .getDistinctValuesCount(); } float randomness = (distinctValue - min) / (max - min); result.setRandomness(randomness); } }