/*******************************************************************************
* Copyright (c) 2015 Pivotal, Inc.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Pivotal, Inc. - initial API and implementation
*******************************************************************************/
package org.springframework.ide.eclipse.editor.support.util;
/**
* @author Kris De Volder
*/
public class FuzzyMatcher {
/**
* Match given pattern with a given data. The data is considered a 'match' for the
* pattern if all characters in the pattern can be found in the data, in the
* same order but with possible 'gaps' in between.
* <p>
* The function returns 0. when the pattern doesn't match the data and a non-zero
* 'score' when it does. The higher the score, the better the match is considered to
* be.
*/
public static double matchScore(String pattern, String data) {
int ppos = 0; //pos of next char in pattern to look for
int dpos = 0; //pos of next char in data not yet matched
int gaps = 0; //number of 'gaps' in the match. A gap is any non-empty run of consecutive characters in the data that are not used by the match
int skips = 0; //number of skipped characters. This is the sum of the length of all the gaps.
int plen = pattern.length();
int dlen = data.length();
if (plen>dlen) {
return 0.0;
}
while (ppos<plen) {
if (dpos>=dlen) {
//still chars left in pattern but no more data
return 0.0;
}
char c = pattern.charAt(ppos++);
int foundCharAt = data.indexOf(c, dpos);
if (foundCharAt>=0) {
if (foundCharAt>dpos) {
gaps++;
skips+=foundCharAt-dpos;
}
dpos = foundCharAt+1;
} else {
return 0.0;
}
}
//end of pattern reached. All matched.
if (dpos<dlen) {
//data left over
//gaps++; don't count end skipped chars as a real 'gap'. Otherwise we
//tend to favor matches at the end of the string over matches in the middle.
skips+=dlen-dpos; //but do count the extra chars at end => more extra = worse score
}
return score(gaps, skips);
}
private static double score(int gaps, int skips) {
if (gaps==0) {
//gaps == 0 means a prefix match, ignore 'skips' at end of String and just sort
// alphabetic (see STS-4049)
double badness = 0.1; // all scored equally, assumes using a 'stable' sorter.
return -badness; //higher is better
} else {
double badness = 1+gaps + skips/10000.0; // higher is worse
return -badness; //higher is better
}
}
}