/**
* Copyright (C) 2001-3, Anthony Harrison anh23@pitt.edu This library is free
* software; you can redistribute it and/or modify it under the terms of the GNU
* Lesser General Public License as published by the Free Software Foundation;
* either version 2.1 of the License, or (at your option) any later version.
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details. You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.jactr.core.utils.similarity;
import java.util.Collection;
import org.jactr.core.chunk.IChunk;
import org.jactr.core.chunk.ISymbolicChunk;
import org.jactr.core.chunk.five.ISubsymbolicChunk5;
import org.jactr.core.slot.ISlot;
/**
* AutoSimilarityHandler that computes the similarity between any two chunks
* based on average of the similarities between their slot values. This is
* similar, if not identical to Raluca's proposal. 0. if they are the same,
* return maxsim 1. if the sim between two chunks is already defined, return it
* 2. if they are of the same chunk type, or derivable chunk types - compare the
* slot values based on the smaller of the two chunks 3. if they are not related
* chunk types - return maxDiff 4. if they are both numbers - return the diff
* scaled by maxDiff/maxSim 5. if they are strings.. well, screw them
*
* @author harrison
* @created April 18, 2003
*/
public class AutoSimilarityHandler implements
SimilarityHandler
{
/**
* Description of the Field
*/
protected int _depth;
/**
* Constructor for the AutoSimilarityHandler object
*/
public AutoSimilarityHandler()
{
this(-1);
}
/**
* Constructor for the AutoSimilarityHandler object
*
* @param depth
* Description of the Parameter
*/
public AutoSimilarityHandler(int depth)
{
_depth = depth;
}
/**
* Description of the Method
*
* @param one
* Description of the Parameter
* @param two
* Description of the Parameter
* @return Description of the Return Value
*/
public boolean handles(Object one, Object two)
{
return true;
}
/*
* compute the similarty between one and two scaled to fit maxDiff and maxSim
* This is done by recursively descending to depth checking slot values..
*/
/**
* Description of the Method
*
* @param one
* Description of the Parameter
* @param two
* Description of the Parameter
* @param maxDiff
* Description of the Parameter
* @param maxSim
* Description of the Parameter
* @return Description of the Return Value
*/
public double computeSimilarity(Object one, Object two, double maxDiff,
double maxSim)
{
return computeSimilarity(one, two, maxDiff, maxSim, _depth);
}
/**
* Description of the Method
*
* @param one
* Description of the Parameter
* @param two
* Description of the Parameter
* @param maxDiff
* Description of the Parameter
* @param maxSim
* Description of the Parameter
* @param depth
* Description of the Parameter
* @return Description of the Return Value
*/
protected double computeSimilarity(Object one, Object two, double maxDiff,
double maxSim, int depth)
{
if (one == two)
{
return maxSim;
}
//identical
else if (depth == 0)
{
return maxDiff;
}
//we've drilled alway down
else if ((one == null || two != null) && (one != null || two == null))
{
return maxDiff;
}
else if (one instanceof Number)
{
//two numbers??
if (!(two instanceof Number)) { return maxDiff; }
//nope, just one
int first = ((Number) one).intValue();
int second = ((Number) two).intValue();
double scale = 1 - (Math.max(second, first) - Math.min(second, first))
/ Math.max(second, first);
return scale * (maxSim - maxDiff) + maxDiff;
}
else if (one instanceof String)
{
if (!(two instanceof String)) { return maxDiff; }
// String first = (String) one;
// String second = (String) two;
//how do we numerically compare two strings?
return maxDiff;
}
else if (one instanceof IChunk)
{
if (!(two instanceof IChunk)) { return maxDiff; }
/*
* both chunks must have ISubsymbolicChunk5
*/
IChunk c1 = (IChunk) one;
IChunk c2 = (IChunk) two;
if( ! (c1.getSubsymbolicChunk() instanceof ISubsymbolicChunk5) ||
! (c2.getSubsymbolicChunk() instanceof ISubsymbolicChunk5))
return maxDiff;
double lastSim = ((ISubsymbolicChunk5)c1.getSubsymbolicChunk()).getSimilarity(
c2);
//both are chunks...
//if the sim is already defined...return it
if (!Double.isNaN(lastSim))
return lastSim;
ISymbolicChunk first = ((IChunk) one).getSymbolicChunk();
ISymbolicChunk second = ((IChunk) two).getSymbolicChunk();
//same chunk types?
if (!first.isA(second.getChunkType())
&& !second.isA(first.getChunkType())) { return maxDiff; }
Collection<? extends ISlot> fSlots = first.getSlots();
Collection<? extends ISlot> sSlots = second.getSlots();
Collection<? extends ISlot> slots = null;
//slot matches?
int diffSlots = fSlots.size() - sSlots.size();
//so who ever has the fewest slots is the basis of comparison
if (diffSlots < 0)
{
slots = fSlots;
diffSlots = Math.abs(diffSlots);
}
else
{
slots = sSlots;
second = first;
}
double sim = maxDiff * diffSlots;
for(ISlot slot : slots)
{
try
{
Object sOne = slot.getValue();
Object sTwo = second.getSlot(slot.getName()).getValue();
sim += computeSimilarity(sOne, sTwo, maxDiff, maxSim, --depth);
}
catch (Exception e)
{
sim += maxDiff;
//slot error
}
}
sim /= (slots.size() + diffSlots);
return sim;
}
else
{
return maxDiff;
}
}
}