// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.mydistance;
import org.talend.dataquality.record.linkage.attribute.AbstractAttributeMatcher;
import org.talend.dataquality.record.linkage.constant.AttributeMatcherType;
/**
* @author scorreia
*
* Example of Matching distance.
*/
public class MyDistance extends AbstractAttributeMatcher {
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.record.linkage.attribute.IAttributeMatcher#getMatchType()
*/
@Override
public AttributeMatcherType getMatchType() {
// a custom implementation should return this type AttributeMatcherType.custom
return AttributeMatcherType.CUSTOM;
}
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.record.linkage.attribute.IAttributeMatcher#getMatchingWeight(java.lang.String,
* java.lang.String)
*/
@Override
public double getWeight(String arg0, String arg1) {
// Here goes the custom implementation of the matching distance between the two given strings.
// the algorithm should return a value between 0 and 1.
// in this example, we consider that 2 strings match if their first 4 characters are identical
// the arguments are not null (the check for nullity is done by the caller)
int MAX_CHAR = 4;
final int max = Math.min(MAX_CHAR, Math.min(arg0.length(), arg1.length()));
int nbIdenticalChar = 0;
for (; nbIdenticalChar < max; nbIdenticalChar++) {
if (arg0.charAt(nbIdenticalChar) != arg1.charAt(nbIdenticalChar)) {
break;
}
}
if (arg0.length() < MAX_CHAR && arg1.length() < MAX_CHAR) {
MAX_CHAR = Math.max(arg0.length(), arg1.length());
}
return (nbIdenticalChar) / ((double) MAX_CHAR);
}
// This method is only for testing the class. It's not required when developping a new distance.
// Delete it if you reuse this code to build your own library.
public static void main(String[] args) {
MyDistance dist = new MyDistance();
String[] strings = { "testlong", "testlon", "bad", "testlong", "test", "te", "te", "mad" };
for (String a : strings) {
for (String b : strings) {
System.out.println("Dist(" + a + "," + b + ")= " + dist.getWeight(a, b));
}
}
}
}