/************************************************************************ * Copyright (C) 2006-2007 The University of Sheffield * * Developed by Mark A. Greenwood <m.greenwood@dcs.shef.ac.uk> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * ************************************************************************/ package edu.isistan.uima.unified.algorithms.similarity; import net.didion.jwnl.JWNLException; import net.didion.jwnl.data.Synset; /** * An implementation of the WordNet similarity measure developed by Lin. For * full details of the measure see: * <blockquote>Lin D. 1998. An information-theoretic definition of similarity. In * Proceedings of the 15th International Conference on Machine * Learning, Madison, WI.</blockquote> * @author Mark A. Greenwood */ public class Lin extends ICMeasure { /** * Instances of this similarity measure should be generated using the * factory methods of {@link SimilarityMeasure}. */ protected Lin() { //A protected constructor to force the use of the newInstance method } @Override public double getSimilarity(Synset s1, Synset s2) throws JWNLException { //if the POS tags are not the same then return 0 as this measure //only works with 2 nouns or 2 verbs. if (!s1.getPOS().equals(s2.getPOS())) return 0; //see if the similarity is already cached and... Double cached = getFromCache(s1, s2); //if it is then simply return it if (cached != null) return cached.doubleValue(); //Get the Information Content (IC) values for the two supplied synsets double ic1 = getIC(s1); double ic2 = getIC(s2); //if either IC value is zero then cache and return a sim of 0 if (ic1 == 0 || ic2 == 0) return addToCache(s1,s2,0); //Get the Lowest Common Subsumer (LCS) of the two synsets Synset lcs = getLCSbyIC(s1,s2); //if there isn't an LCS then cache and return a sim of 0 if (lcs == null) return addToCache(s1,s2,0); //get the IC valueof the LCS double icLCS = getIC(lcs); //caluclaue the similarity score double sim = (2*icLCS)/(ic1+ic2); //cache and return the calculated similarity return addToCache(s1,s2,sim); } }