package org.openhab.domain.util; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.inject.Inject; /** * Created by Tony Alpskog in 2014. */ public class RegularExpression implements IRegularExpression { @Inject public RegularExpression() { } @Override public RegExResult getAllNextMatchAsList(String regEx, String target, boolean caseInsensitive) { RegExResult regExResult = new RegExResult(); Matcher matcher = Pattern.compile(regEx, Pattern.CASE_INSENSITIVE).matcher(target); regExResult.Matcher = matcher; regExResult.GroupList = new ArrayList<String>(); while(matcher.find()) for(int i = 1; i <= matcher.groupCount(); i++) if(matcher.group(i) != null && !matcher.group(i).isEmpty()) regExResult.GroupList.add(matcher.group(i)); return regExResult; } @Override public RegExAccuracyResult getStringMatchAccuracy(List<String> sourceWordsList, String target) { double wordCountAccuracy = 0; double orderAccuracy = 0; double lengthDifferenceAccuracy = 0; int totalMatchLength = 0; String regEx = getRegExStringForMatchAccuracySource(sourceWordsList); List<String> matchingWords = new ArrayList<String>(); target = target.toUpperCase(); RegExResult regExResult = getAllNextMatchAsList(regEx, target, true); wordCountAccuracy = Double.valueOf(regExResult.GroupList.size()) / Double.valueOf(sourceWordsList.size()); int lastListMatchIndex = -1; for (int i = 0; i < regExResult.GroupList.size(); i++) { //TODO - Call LevenshteinDistance for all non-regex-matching strings totalMatchLength += regExResult.GroupList.get(i).length() + 1; int listMatchIndex = sourceWordsList.indexOf(regExResult.GroupList.get(i)); if(listMatchIndex == -1) { for(int sourceIndex = 0; sourceIndex < sourceWordsList.size(); sourceIndex++) { if (regExResult.GroupList.get(i).toUpperCase().contains(sourceWordsList.get(sourceIndex).toUpperCase())) { listMatchIndex = sourceIndex; break; } } } matchingWords.add(sourceWordsList.get(listMatchIndex)); if (listMatchIndex > lastListMatchIndex) { lastListMatchIndex = listMatchIndex; orderAccuracy++; continue; } } if (orderAccuracy > 0) orderAccuracy = orderAccuracy / Double.valueOf(sourceWordsList.size()); totalMatchLength -= 1; if(target.length() > 0) lengthDifferenceAccuracy = totalMatchLength > target.length()? Double.valueOf(target.length()) / Double.valueOf(totalMatchLength) : Double.valueOf(totalMatchLength) / Double.valueOf(target.length()); if(lengthDifferenceAccuracy < 0) lengthDifferenceAccuracy = 0; return new RegExAccuracyResult(matchingWords, sourceWordsList.size() > 0? (wordCountAccuracy + orderAccuracy + lengthDifferenceAccuracy) / 3 : 0); } @Override public String getRegExStringForMatchAccuracySource(String[] source) { StringBuilder sb = new StringBuilder(); for(String partOfLabel : source) sb.append((sb.length() > 1? "|" : "") + "(" + partOfLabel.toUpperCase() + ")"); return sb.toString(); } @Override public String getRegExStringForMatchAccuracySource(List<String> source) { return getRegExStringForMatchAccuracySource(source.toArray(new String[0])); } }