package plugins.HarmonizationComponent;
import java.io.File;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import org.semanticweb.owlapi.model.IRI;
public class levenshteinDistance
{
private OWLFunction owlFunction = null;
private double cutOff = 60.0;
public static void main(String args[]) throws Exception
{
System.out.println("The mapping has started");
levenshteinDistance test = new levenshteinDistance();
test.startMatching();
System.out.println("The mapping has been done!");
}
public void startMatching() throws Exception
{
WritableWorkbook workbook = Workbook.createWorkbook(new File(
"/Users/pc_iverson/Desktop/Ontology_term_pilot/result2.xls"));
WritableSheet sheet = workbook.createSheet("result", 0);
LevenshteinDistanceModel matchingModel = new LevenshteinDistanceModel();
String fileName = "/Users/pc_iverson/Desktop/Ontology_term_pilot/LeidseHPOLijste2.xls";
tableModel model = new tableModel(fileName, false);
List<String> originalTerms = model.getColumn("Symptom");
String ontologyFileName = "/Users/pc_iverson/Desktop/Ontology_term_pilot/human-phenotype-ontology.obo";
this.owlFunction = new OWLFunction(ontologyFileName);
owlFunction.labelMapURI();
List<String> listOfOntologyTerms = owlFunction.getAllTerms();
String file2 = "/Users/pc_iverson/Desktop/Ontology_term_pilot/CineasDiagnoses.xls";
tableModel model2 = new tableModel(file2, false);
List<String> listOfSympotoms = model2.getColumn("ZIEKTETEKST");
int rowIndex = 0;
for (String eachTerm : originalTerms)
{
double bestScore = 0;
String matchedOntologyTerm = "";
String synonymForOntologyTerm = "";
Map<String, Double> candidateMatching = new HashMap<String, Double>();
Map<String, String> candidateHPOId = new HashMap<String, String>();
for (String ontologyTerm : listOfOntologyTerms)
{
double similarity = matchingModel.stringMatching(eachTerm, ontologyTerm, false);
if (bestScore < similarity)
{
bestScore = similarity;
matchedOntologyTerm = ontologyTerm;
synonymForOntologyTerm = "";
}
if (similarity > cutOff)
{
candidateMatching.put(ontologyTerm, similarity);
candidateHPOId.put(ontologyTerm, owlFunction.getOntologyTermID(ontologyTerm));
}
List<String> synonyms = owlFunction.getAnnotation(
ontologyTerm,
IRI.create(owlFunction.getOntologyIRI()
.subSequence(0, owlFunction.getOntologyIRI().length() - 2).toString()
+ "synonym"));
for (String eachSynonym : synonyms)
{
double similarity2 = matchingModel.stringMatching(eachTerm, eachSynonym, false);
if (bestScore < similarity2)
{
bestScore = similarity2;
synonymForOntologyTerm = eachSynonym;
matchedOntologyTerm = ontologyTerm;
}
if (similarity2 > cutOff)
{
String output = "Synonym match: " + eachSynonym + "; The ontology term: " + ontologyTerm;
candidateMatching.put(output, similarity2);
candidateHPOId.put(output, owlFunction.getOntologyTermID(ontologyTerm));
}
}
}
System.out.println(eachTerm + "\t" + matchedOntologyTerm + "\t"
+ owlFunction.getOntologyTermID(matchedOntologyTerm) + "\t" + bestScore);
Label originalTermCell = new Label(0, rowIndex, eachTerm);
Label termIDCell = new Label(2, rowIndex, owlFunction.getOntologyTermID(matchedOntologyTerm));
if (!synonymForOntologyTerm.equals(""))
{
matchedOntologyTerm = "Synonym match: " + synonymForOntologyTerm + "; The ontology term: "
+ matchedOntologyTerm;
}
Label ontologyTermCell = new Label(1, rowIndex, matchedOntologyTerm);
Label scoreCell = new Label(3, rowIndex, "" + bestScore);
sheet.addCell(originalTermCell);
sheet.addCell(ontologyTermCell);
sheet.addCell(termIDCell);
sheet.addCell(scoreCell);
// if(bestScore < 90){
//
// for(Entry<String, Double> eachEntry :
// candidateMatching.entrySet()){
//
// String term = eachEntry.getKey();
//
// Double similarity = eachEntry.getValue();
//
// String HPOId = candidateHPOId.get(term);
//
// originalTermCell = new Label(0, rowIndex, eachTerm);
//
// ontologyTermCell = new Label(1, rowIndex, term);
//
// termIDCell = new Label(2, rowIndex, HPOId);
//
// scoreCell = new Label(3, rowIndex, "" + similarity);
//
// //sheet.addCell(originalTermCell);
//
// sheet.addCell(ontologyTermCell);
//
// sheet.addCell(termIDCell);
//
// sheet.addCell(scoreCell);
//
// rowIndex++;
// }
// }
double bestScoreForSympotom = 0;
String matchedSympotom = "";
for (String sympotom : listOfSympotoms)
{
double similiarityScore = matchingModel.stringMatching(eachTerm, sympotom, false);
if (bestScoreForSympotom < similiarityScore)
{
bestScoreForSympotom = similiarityScore;
matchedSympotom = sympotom;
}
}
Label symptomCell = new Label(4, rowIndex, matchedSympotom);
Label scoreForSymptomCell = new Label(5, rowIndex, "" + bestScoreForSympotom);
sheet.addCell(symptomCell);
sheet.addCell(scoreForSymptomCell);
rowIndex++;
}
workbook.write();
workbook.close();
}
}