package org.xpect.text; import org.xpect.util.IDifferencer.ISimilarityFunction; import com.google.common.base.Strings; public class StringEndsSimilarityFunction implements ISimilarityFunction<String> { private final float TRESHOLD; public StringEndsSimilarityFunction(float treshold) { super(); this.TRESHOLD = treshold; } public StringEndsSimilarityFunction() { this(0.4f); } public float similarity(String s1, String s2) { if (Strings.isNullOrEmpty(s1) || Strings.isNullOrEmpty(s2)) return ISimilarityFunction.UPPER_SIMILARITY_BOUND; if (s1.equals(s2)) return ISimilarityFunction.EQUAL; int minChars = Math.min(s1.length(), s2.length()); int maxChars = Math.max(s1.length(), s2.length()); if (minChars / maxChars < TRESHOLD) return ISimilarityFunction.UPPER_SIMILARITY_BOUND; int charsInCommon = 0; for (int i = 0; i < minChars; i++) if (s1.charAt(i) == s2.charAt(i)) charsInCommon++; else break; minChars -= charsInCommon; for (int i = 1; i <= minChars; i++) if (s1.charAt(s1.length() - i) == s2.charAt(s2.length() - i)) charsInCommon++; else break; if (charsInCommon == 0) return ISimilarityFunction.UPPER_SIMILARITY_BOUND; float ratio = ISimilarityFunction.UPPER_SIMILARITY_BOUND - ((float) charsInCommon / maxChars); return Math.min(ratio * (1 / TRESHOLD), ISimilarityFunction.UPPER_SIMILARITY_BOUND); } }