/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package org.erasmusmc.conceptprofilegenerator.generators; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; import org.erasmusmc.applications.conceptprofileevaluator.SubGroundhogStatistics; import org.erasmusmc.collections.SortedIntList2FloatMap; import org.erasmusmc.groundhog.ConceptStatistic; import org.erasmusmc.groundhog.GroundhogStatistics; import org.erasmusmc.math.AssociationMeasures; import org.erasmusmc.ontology.ConceptProfile; import org.erasmusmc.ontology.ConceptVector; public class CPGeneratorUncertaintyCoefficientConceptFrequencies extends ConceptProfileGenerator { public double cutoff =0d; public CPGeneratorUncertaintyCoefficientConceptFrequencies(GroundhogStatistics wholeGroundhogStatistics, Set<Integer> conceptsForFiltering) { super(wholeGroundhogStatistics, conceptsForFiltering); } /* * public Map<Double, ConceptProfile> generateConceptProfiles(Map<Double, * Double> cutoffvalues) { Map<Double, ConceptProfile> result = new HashMap<Double, * ConceptProfile>(); for (Double key: cutoffvalues.keySet()) { * result.put(key, generateConceptProfile(cutoffvalues.get(key))); } * * return result; } */ @Override public ConceptProfile generateConceptProfile(SubGroundhogStatistics subGroundhogStatistics, Integer ownerConcept) { List<ConceptVectorEntry> values = new ArrayList<ConceptVectorEntry>(subGroundhogStatistics.conceptStatistics.values().size()); for (Map.Entry<Integer, ConceptStatistic> entry : subGroundhogStatistics.conceptStatistics.entrySet()){ int concept = entry.getKey(); ConceptStatistic conceptStatistic = entry.getValue(); double weight = 0; if (conceptsToBeFiltered == null || !conceptsToBeFiltered.contains(concept)) weight = getWeight(concept, conceptStatistic, subGroundhogStatistics.allConceptOccurrences); if (weight > cutoff) { values.add(new ConceptVectorEntry(concept, weight)); } } Collections.sort(values, ConceptVectorEntry.fingerprintEntryComparator()); int i = 0; int size; if (values.size()>maxNumberOfConceptsPerProfile)size=maxNumberOfConceptsPerProfile; else size = values.size(); SortedIntList2FloatMap map = new SortedIntList2FloatMap(size); while (i < size) { ConceptVectorEntry mapentry = values.get(i); map.put(mapentry.key, new Double(mapentry.value).floatValue()); i++; } ConceptVector cv =new ConceptVector(null,map); ConceptProfile result = new ConceptProfile(ownerConcept, cv); return result; } protected double getWeight(int cid, ConceptStatistic conceptStatistic, Integer allConceptOccurrencesInSubset) { int A = conceptStatistic.termFrequency; int B; ConceptStatistic mainCollectionStatistic = groundhogStatistics.conceptStatistics.get(cid); if (mainCollectionStatistic == null){ B = - A; } else { B = mainCollectionStatistic.termFrequency - A; } int C = allConceptOccurrencesInSubset - A; int D = groundhogStatistics.allConceptOccurrences - A - B - C; return AssociationMeasures.symmetricUncertaintyCoefficient(A, B, C, D) ; // return AssociationMeasures.asymmetricUncertaintyCoefficient(A, C, B, D) ; } }