package de.tud.inf.operator.fingerprints.gsc; import java.util.HashSet; import java.util.Iterator; import java.util.Set; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.learner.clustering.IdUtils; import com.rapidminer.operator.similarity.SimilarityAdapter; import com.rapidminer.operator.similarity.attributebased.ExampleBasedSimilarityMeasure; import de.tud.inf.example.set.attributevalues.ConstantArrayValue; import de.tud.inf.example.table.ConstantArrayAttribute; public class GscCosineSimilarity extends SimilarityAdapter implements ExampleBasedSimilarityMeasure{ /** * */ private static final long serialVersionUID = 5697316802104876027L; private Attribute fingerPrintAttr; private Set<String> ids; private ExampleSet es; public void init(ExampleSet exampleSet) throws OperatorException { ids = new HashSet<String>(exampleSet.size(),1.0f); this.es = exampleSet; fingerPrintAttr = exampleSet.getAttributes().get("fingerprint"); if(!(fingerPrintAttr instanceof ConstantArrayAttribute)) throw new OperatorException("SimilarityMeasure only definded for ConstantArrayAttribute"); for(Example e : exampleSet) ids.add(IdUtils.getIdFromExample(e)); } public double similarity(Example x, Example y) { return similarity((ConstantArrayValue)x.getComplexValue(fingerPrintAttr), (ConstantArrayValue)y.getComplexValue(fingerPrintAttr)); } public Iterator<String> getIds() { return ids.iterator(); } public int getNumberOfIds() { return ids.size(); } public boolean isDistance() { return false; } public boolean isSimilarityDefined(String x, String y) { return (ids.contains(x) && ids.contains(y)); } public double similarity(String x, String y) { if(!isSimilarityDefined(x, y)) return Double.NaN; Example ex = IdUtils.getExampleFromId(es, x); Example ey = IdUtils.getExampleFromId(es, y); return similarity((ConstantArrayValue)ex.getComplexValue(fingerPrintAttr), (ConstantArrayValue)ey.getComplexValue(fingerPrintAttr)); } public double similarity(ConstantArrayValue x, ConstantArrayValue y) { double sum = 0.0; double sum1 = 0.0; double sum2 = 0.0; for(int i = 0;i<x.getValues().length;i++) { for(int j = 0;j<x.getValues()[i].length;i++) { double v1 = x.getValues()[i][j]; double v2 = y.getValues()[i][j]; sum = sum + v1*v2; sum1 = sum1 + v1*v1; sum2 = sum2 + v2*v2; } } if(sum1 > 0 && sum2 > 0) return sum / (Math.sqrt(sum1)*Math.sqrt(sum2)); return Double.NaN; } }