package edu.isistan.uima.unified.algorithms.clustering.distance; import net.didion.jwnl.JWNLException; import net.didion.jwnl.data.POS; import net.didion.jwnl.data.Synset; import net.didion.jwnl.dictionary.Dictionary; import edu.isistan.uima.unified.algorithms.clustering.data.DataPoint; import edu.isistan.uima.unified.algorithms.clustering.data.LabeledDataPoint; import edu.isistan.uima.unified.algorithms.similarity.SimilarityMeasure; public class SemanticDistanceMeasure implements DistanceMeasure { private SimilarityMeasure similarityMeasure = null; private Dictionary dictionary; public SemanticDistanceMeasure(SimilarityMeasure similarityMeasure) { this.similarityMeasure = similarityMeasure; dictionary = Dictionary.getInstance(); } @Override public double distance(DataPoint firstPoint, DataPoint secondPoint) { long offset1 = (long) firstPoint.getValue(); POS pos1 = getPartOfSpeech(((LabeledDataPoint)firstPoint).getLabel()); long offset2 = (long) secondPoint.getValue(); POS pos2 = getPartOfSpeech(((LabeledDataPoint)secondPoint).getLabel()); Synset s1 = null; Synset s2 = null; try { s1 = dictionary.getSynsetAt(pos1, offset1); s2 = dictionary.getSynsetAt(pos2, offset2); } catch (JWNLException e) { e.printStackTrace(); } if(s1 != null && s2 != null) try { double similarity = similarityMeasure.getSimilarity(s1, s2); double distance = SemanticDistanceMeasureConverter.logarithmic(similarity); return distance; } catch (JWNLException e) { e.printStackTrace(); } return 0; } /** * Conversion of Penn Treebank TAGs to JWNL POS Objects */ private POS getPartOfSpeech(String postag) { if(postag.startsWith("NN")) { return POS.NOUN; } if (postag.startsWith("VB")) { return POS.VERB; } if (postag.startsWith("JJ")) { return POS.ADJECTIVE; } if (postag.startsWith("RB")) { return POS.ADVERB; } return null; } }