package fr.inria.diversify.util;
import java.util.ArrayList;
import java.util.List;
/**
* Created by marodrig on 21/12/2014.
*/
/// <summary>
/// This class implements String comparison algorithm
/// based on character pair similarity
/// Source: http://www.catalysoft.com/articles/StrikeAMatch.html
/// </summary>
public class StringSimilarity {
/// <summary>
/// Compares the two strings based on letter pair matches
/// </summary>
/// <param name="str1"></param>
/// <param name="str2"></param>
/// <returns>The percentage match from 0.0 to 1.0 where 1.0 is 100%</returns>
public static double CompareStrings(String str1, String str2) {
List<String> pairs1 = WordLetterPairs(str1.toUpperCase());
List<String> pairs2 = WordLetterPairs(str2.toUpperCase());
double intersection = 0;
double union = pairs1.size() + pairs2.size();
for (int i = 0; i < pairs1.size(); i++) {
for (int j = 0; j < pairs2.size(); j++) {
if (pairs1.get(i).equals(pairs2.get(j))) {
intersection++;
pairs2.remove(j);//Must remove the match to prevent "GGGG" from appearing to match "GG" with 100% success
break;
}
}
}
return (2.0 * intersection) / union;
}
/// <summary>
/// Gets all letter pairs for each
/// individual word in the String
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
private static List<String> WordLetterPairs(String str) {
List<String> AllPairs = new ArrayList<String>();
// Tokenize the String and putDataToJSON the tokens/words into an array
String[] Words = str.split("\\s");
// For each word
for (int w = 0; w < Words.length; w++) {
if (!(Words[w] == null || Words[w].equals(""))) {
// Find the pairs of characters
String[] PairsInWord = LetterPairs(Words[w]);
for (int p = 0; p < PairsInWord.length; p++) {
AllPairs.add(PairsInWord[p]);
}
}
}
return AllPairs;
}
/// <summary>
/// Generates an array containing every
/// two consecutive letters in the input String
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
private static String[] LetterPairs(String str) {
int numPairs = str.length() - 1;
if ( numPairs == 0 ) return new String[]{str + "€"};
String[] pairs = new String[numPairs];
for (int i = 0; i < numPairs; i++) {
pairs[i] = str.substring(i, i + 2);
}
return pairs;
}
}