/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.ontology.ontologyutilities;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Map.Entry;
import org.erasmusmc.collections.IntList;
import org.erasmusmc.math.vector.SparseVectorInt2Float;
import org.erasmusmc.ontology.Concept;
import org.erasmusmc.peregrine.ConceptPeregrine;
import org.erasmusmc.peregrine.ReleasedTerm;
import org.erasmusmc.utilities.StringUtilities;
import org.erasmusmc.utilities.TextFileUtilities;
public class HomonymAnalyzer extends ConceptPeregrine {
public void countHomonyms(String filename) {
System.out.println("Releasing thesaurus");
release();
System.out.println("Saving homonyms");
List<String> lines = new ArrayList<String>();
for (ReleasedTerm term: terms) {
Set<Integer> uniqueCIDs = new TreeSet<Integer>();
for (int cid : term.conceptId)
uniqueCIDs.add(cid);
if (uniqueCIDs.size() > 1) {
StringBuffer line = new StringBuffer();
line.append(uniqueCIDs.size());
line.append("\t");
line.append(ontology.getConcept(term.conceptId[0]).getTerms().get(term.termId[0]).text);
line.append("\t");
for (Integer conceptId: uniqueCIDs) {
line.append(conceptId);
line.append(";");
}
lines.add(line.toString());
}
}
TextFileUtilities.saveToFile(lines, filename);
System.out.println("Done");
}
//Returns a map from concept ID to a map containing concept IDs, and the strings (terms) that they have in common.
public Map<Integer,Map<Integer,List<String>>> compareConcepts() {
System.out.println("Releasing thesaurus");
release();
System.out.println("Analyzing terms");
Map<Integer, Map<Integer, List<String>>> overlap = new TreeMap<Integer, Map<Integer, List<String>>>();
for (ReleasedTerm term: terms) {
Set<Integer> uniqueCIDs = new TreeSet<Integer>();
for (int cid : term.conceptId)
uniqueCIDs.add(cid);
if (uniqueCIDs.size() > 1) {
List<Integer> array = new IntList();
array.addAll(uniqueCIDs);
String termString = ontology.getConcept(term.conceptId[0]).getTerms().get(term.termId[0]).text;
System.out.println(array);
//Collections.sort(array);
for (int i = 0; i < array.size(); i++) {
for (int j = 0 ; j < array.size(); j++)
if (i != j){
Map<Integer, List<String>> map = overlap.get(array.get(i));
if (map == null) {
map = new TreeMap<Integer, List<String>>();
overlap.put(array.get(i), map);
}
List<String> overlapStrings = map.get(array.get(j));
if (overlapStrings == null) {
overlapStrings = new ArrayList<String>();
map.put(array.get(j), overlapStrings);
}
overlapStrings.add(termString);
}
}
}
}
return overlap;
}
//Returns a map from concept ID to a vector containing concept IDs, and the number of terms that they have in common.
public Map<Integer,SparseVectorInt2Float> compareConceptsLight() {
System.out.println("Releasing thesaurus");
release();
System.out.println("Analyzing terms");
Map<Integer, SparseVectorInt2Float> overlap = new TreeMap<Integer, SparseVectorInt2Float>();
for (ReleasedTerm term: terms) {
Set<Integer> uniqueCIDs = new TreeSet<Integer>();
for (int cid : term.conceptId)
uniqueCIDs.add(cid);
if (uniqueCIDs.size() > 1) {
List<Integer> array = new ArrayList<Integer>(uniqueCIDs);
// String termString = ontology.getConcept(term.conceptId[0]).getTerms().get(term.termId[0]).text;
Collections.sort(array);
for (int i = 0; i < (array.size() - 1); i++) {
for (int j = i + 1; j < array.size(); j++) {
SparseVectorInt2Float map = overlap.get(array.get(i));
if (map == null) {
map = new SparseVectorInt2Float();
overlap.put(array.get(i), map);
}
Double overlapStrings = map.get(array.get(j));
overlapStrings++;
map.set(array.get(j),overlapStrings);
}
}
}
}
return overlap;
}
public void compareConceptsAndPrint2File(String filename){
Map<Integer, Map<Integer, List<String>>> overlap = compareConcepts();
List<String> lines = new ArrayList<String>();
for (Entry<Integer, Map<Integer, List<String>>> entry: overlap.entrySet()) {
Concept concept1 = ontology.getConcept(entry.getKey());
Map<Integer, List<String>> strings = entry.getValue();
for (Entry<Integer, List<String>> entry2: strings.entrySet()) {
List<String> value = entry2.getValue();
Concept concept2 = ontology.getConcept(entry2.getKey());
if (value.size() > 1) {
String line = concept1.getID() + "\t" + concept2.getID() + "\t" + value.size() + "\t" + concept1.getTerms().size() + "\t" + concept2.getTerms().size();
if (value.size() == concept1.getTerms().size() || value.size() == concept1.getTerms().size()){
line += "\t1";
}
else
line+="\t0";
Double dice = (double)2d* value.size() / (double) (concept1.getTerms().size() + concept2.getTerms().size());
line += "\t" + dice;
String terms = StringUtilities.join(value, ";");
line += "\t" + terms;
lines.add(line);
}
}
}
TextFileUtilities.saveToFile(lines, filename);
System.out.println("Done");
}
}