// Copyright 2016 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.devtools.build.lib.util; import javax.annotation.Nullable; /** * Class that provides functions to do spell checking, i.e. detect typos * and make suggestions. */ public final class SpellChecker { /** * Computes the edit distance between two strings. The edit distance is * the minimum number of insertions, deletions and replacements to * transform a string into the other string. * * maxEditDistance is the maximum distance the function can return. If * it would be greater, the function returns -1. It is useful for * speeding up the computations. */ public static int editDistance(String s1, String s2, int maxEditDistance) { // This is the Levenshtein distance, as described here: // http://en.wikipedia.org/wiki/Levenshtein_distance // // We don't need to keep the full matrix. To update a cell, we only // need top-left, top, and left values. Using a single array is // sufficient. Top value is still in row[j] from the last iteration. // Top-left value is stored in 'previous'. Left value is row[j - 1]. if (s1.equals(s2)) { return 0; } // Short-circuit based on string length. if (Math.abs(s1.length() - s2.length()) > maxEditDistance) { return -1; } int[] row = new int[s2.length() + 1]; for (int i = 0; i <= s2.length(); i++) { row[i] = i; } for (int i = 1; i <= s1.length(); i++) { row[0] = i; int bestInTheRow = row[0]; int previous = i - 1; for (int j = 1; j <= s2.length(); j++) { int old = row[j]; row[j] = Math.min( previous + (s1.charAt(i - 1) == s2.charAt(j - 1) ? 0 : 1), 1 + Math.min(row[j - 1], row[j])); previous = old; bestInTheRow = Math.min(bestInTheRow, row[j]); } if (bestInTheRow > maxEditDistance) { return -1; } } int result = row[s2.length()]; return result <= maxEditDistance ? result : -1; } /** * Find in words which string is the most similar to input (according to * the edit distance, ignoring case) - or null if no string is similar * enough. In case of equality, the first one in words wins. */ @Nullable public static String suggest(String input, Iterable<String> words) { String best = null; // Heuristic: the expected number of typos depends on the length of the word. int bestDistance = Math.min(5, (input.length() + 1) / 2); input = input.toLowerCase(); for (String candidate : words) { int d = editDistance(input, candidate.toLowerCase(), bestDistance); if (d >= 0 && d < bestDistance) { bestDistance = d; best = candidate; } } return best; } /** * Return a string to be used at the end of an error message. It is either an empty string, or a * spelling suggestion, e.g. " (did you mean 'x'?)". */ public static String didYouMean(String input, Iterable<String> words) { String suggestion = suggest(input, words); if (suggestion == null) { return ""; } else { return " (did you mean '" + suggestion + "'?)"; } } }