package detective.utils; import java.io.IOException; import java.security.SecureRandom; import java.util.Collection; import java.util.Random; import org.elasticsearch.common.Base64; import com.google.common.base.Optional; public class StringUtils { private static class SecureRandomHolder { // class loading is atomic - this is a lazy & safe singleton private static final SecureRandom INSTANCE = new SecureRandom(); } /** * Returns a Base64 encoded version of a Version 4.0 compatible UUID * as defined here: http://www.ietf.org/rfc/rfc4122.txt */ public static String randomBase64UUID() { return randomBase64UUID(SecureRandomHolder.INSTANCE); } /** * Returns a Base64 encoded version of a Version 4.0 compatible UUID * randomly initialized by the given {@link Random} instance * as defined here: http://www.ietf.org/rfc/rfc4122.txt */ public static String randomBase64UUID(Random random) { final byte[] randomBytes = new byte[16]; random.nextBytes(randomBytes); /* Set the version to version 4 (see http://www.ietf.org/rfc/rfc4122.txt) * The randomly or pseudo-randomly generated version. * The version number is in the most significant 4 bits of the time * stamp (bits 4 through 7 of the time_hi_and_version field).*/ randomBytes[6] &= 0x0f; /* clear the 4 most significant bits for the version */ randomBytes[6] |= 0x40; /* set the version to 0100 / 0x40 */ /* Set the variant: * The high field of th clock sequence multiplexed with the variant. * We set only the MSB of the variant*/ randomBytes[8] &= 0x3f; /* clear the 2 most significant bits */ randomBytes[8] |= 0x80; /* set the variant (MSB is set)*/ try { byte[] encoded = Base64.encodeBytesToBytes(randomBytes, 0, randomBytes.length, Base64.URL_SAFE); // we know the bytes are 16, and not a multi of 3, so remove the 2 padding chars that are added assert encoded[encoded.length - 1] == '='; assert encoded[encoded.length - 2] == '='; // we always have padding of two at the end, encode it differently return new String(encoded, 0, encoded.length - 2, Base64.PREFERRED_ENCODING); } catch (IOException e) { throw new RuntimeException("should not be thrown"); } } public static Optional<String> getBestMatch(String wordToMatch, Collection<String> candidates) { if (candidates == null || candidates.size() == 0) return Optional.absent(); String bestMatch = null; int bestScore = Integer.MAX_VALUE; for (String candidate : candidates){ int distance = editDistance(wordToMatch, candidate); if (bestScore > distance){ bestMatch = candidate; bestScore = distance; } } if (bestMatch == null) return Optional.absent(); else return Optional.of(bestMatch); } public static int editDistance(String word1, String word2) { int len1 = word1.length(); int len2 = word2.length(); // len1+1, len2+1, because finally return dp[len1][len2] int[][] dp = new int[len1 + 1][len2 + 1]; for (int i = 0; i <= len1; i++) { dp[i][0] = i; } for (int j = 0; j <= len2; j++) { dp[0][j] = j; } //iterate though, and check last char for (int i = 0; i < len1; i++) { char c1 = word1.charAt(i); for (int j = 0; j < len2; j++) { char c2 = word2.charAt(j); //if last two chars equal if (c1 == c2) { //update dp value for +1 length dp[i + 1][j + 1] = dp[i][j]; } else { int replace = dp[i][j] + 1; int insert = dp[i][j + 1] + 1; int delete = dp[i + 1][j] + 1; int min = replace > insert ? insert : replace; min = delete > min ? min : delete; dp[i + 1][j + 1] = min; } } } return dp[len1][len2]; } }