/* Copyright 2013 Jonatan Jönsson
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package se.softhouse.common.strings;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Strings.repeat;
import static com.google.common.collect.Iterables.isEmpty;
import static se.softhouse.common.strings.StringsUtil.CloseMatch.BY_CLOSEST_MATCH_FIRST;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import javax.annotation.CheckReturnValue;
import javax.annotation.Nonnull;
import javax.annotation.concurrent.Immutable;
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import com.google.common.primitives.Ints;
/**
* Utilities for working with {@link String}s
*/
@Immutable
public final class StringsUtil
{
private StringsUtil()
{
}
/**
* A suitable string to represent newlines on this specific platform
*/
public static final String NEWLINE = System.getProperty("line.separator");
/**
* The <a href="http://en.wikipedia.org/wiki/ASCII_tab">ASCII tab</a> (\t) character
*/
public static final char TAB = '\t';
/**
* @param numberOfSpaces to put in the created string
* @return a string with numberOfSpaces in it
*/
@Nonnull
@CheckReturnValue
public static String spaces(final int numberOfSpaces)
{
return repeat(" ", numberOfSpaces);
}
/**
* Returns a " ^" string pointing at the position indicated by {@code indexToPointAt}
*/
public static String pointingAtIndex(int indexToPointAt)
{
return spaces(indexToPointAt) + "^";
}
/**
* Returns <code>true</code> iff {@code input} {@link String#startsWith(String) starts with}
* {@code toStartWith} and has more characters after that match
*/
public static boolean startsWithAndHasMore(String input, String toStartWith)
{
return input.startsWith(toStartWith) && input.length() > toStartWith.length();
}
/**
* <pre>
* Returns the {@link String} in {@code validOptions} that {@code input} has the shortest
* <a href="http://en.wikipedia.org/wiki/Levenshtein_distance">levenshtein distance</a> to.
*
* For example when given "stats" as input and "status", "help",
* "action" as validOptions, "status" is returned.
*
* Current performance characteristics:
* n = length of {@code input}
* m = average string length of the strings in {@code validOptions}
* s = amount of validOptions
*
* complexity = n * m * s = O(n<sup>3</sup>)
*
* So try to limit the number of valid options...
*
* @throws IllegalArgumentException if {@code validOptions} is empty
* </pre>
*
* @see #closestMatches(String, Iterable, int)
*/
@Nonnull
@CheckReturnValue
public static String closestMatch(final String input, final Iterable<String> validOptions)
{
checkNotNull(input);
checkArgument(!isEmpty(validOptions), "No valid options to match the input against");
int shortestDistance = Integer.MAX_VALUE;
String bestGuess = null;
for(String validOption : validOptions)
{
int distance = levenshteinDistance(input, validOption);
if(distance < shortestDistance)
{
shortestDistance = distance;
bestGuess = validOption;
}
}
return bestGuess;
}
/**
* <pre>
* Returns a sorted {@link List} where the first entry is the {@link String} in {@code validOptions} that's closest in terms of
* <a href="http://en.wikipedia.org/wiki/Levenshtein_distance">levenshtein distance</a> to {@code input}, or an empty list if no options within distance can be found.
*
* For example when given "stats" as input and "status", "staging",
* "stage" as validOptions, and 4 as maximumDistance, "status", "stage", "staging" is returned.
*
* Only values with a distance less than or equal to {@code maximumDistance} will be included in the result.
*
* The returned list is <i>modifiable</i>.
*
* </pre>
*/
@Nonnull
@CheckReturnValue
public static List<String> closestMatches(final String input, final Iterable<String> validOptions, int maximumDistance)
{
checkNotNull(input);
if(isEmpty(validOptions))
return Collections.emptyList();
List<CloseMatch> closeMatches = Lists.newArrayList();
for(String validOption : validOptions)
{
int distance = levenshteinDistance(input, validOption, maximumDistance + 1);
if(distance <= maximumDistance)
{
closeMatches.add(new CloseMatch(validOption, distance));
}
}
Collections.sort(closeMatches, BY_CLOSEST_MATCH_FIRST);
return Lists.transform(closeMatches, CloseMatch.GET_VALUE);
}
static final class CloseMatch
{
private final int measuredDistance;
private final String value;
private CloseMatch(String validOption, int distance)
{
measuredDistance = distance;
value = validOption;
}
static final Comparator<CloseMatch> BY_CLOSEST_MATCH_FIRST = new Comparator<CloseMatch>(){
@Override
public int compare(CloseMatch left, CloseMatch right)
{
return left.measuredDistance - right.measuredDistance;
}
};
private static final Function<CloseMatch, String> GET_VALUE = new Function<CloseMatch, String>(){
@Override
public String apply(@Nonnull CloseMatch input)
{
return input.value;
}
};
}
/**
* Returns the <a href="http://en.wikipedia.org/wiki/Levenshtein_distance">levenshtein
* distance</a> between {@code left} and {@code right}.
*
* @see #closestMatch(String, Iterable)
*/
public static int levenshteinDistance(final String left, final String right)
{
return levenshteinDistance(left, right, Integer.MAX_VALUE);
}
/**
* Returns the <a href="http://en.wikipedia.org/wiki/Levenshtein_distance">levenshtein
* distance</a> between {@code left} and {@code right}. If it's greater than maxDistance, maxDistance will be returned.
*
* @see #closestMatch(String, Iterable)
*/
public static int levenshteinDistance(final String left, final String right, final int maxDistance)
{
checkNotNull(left);
checkNotNull(right);
checkArgument(maxDistance >= 0);
// a "cleaner" version of the org.apache.commons-lang algorithm which in
// turn was inspired by http://www.merriampark.com/ldjava.htm
int leftLength = left.length();
int rightLength = right.length();
if(leftLength == 0)
return rightLength;
else if(rightLength == 0)
return leftLength;
else if(Math.abs(leftLength- rightLength) > maxDistance)
return maxDistance;
int previousDistances[] = new int[leftLength + 1]; // 'previous' cost array, horizontally
int distances[] = new int[leftLength + 1]; // cost array, horizontally
int leftIndex;
int rightIndex;
char rightChar;
for(leftIndex = 0; leftIndex <= leftLength; leftIndex++)
{
previousDistances[leftIndex] = leftIndex;
}
for(rightIndex = 1; rightIndex <= rightLength; rightIndex++)
{
rightChar = right.charAt(rightIndex - 1);
distances[0] = rightIndex;
for(leftIndex = 1; leftIndex <= leftLength; leftIndex++)
{
int insertionCost = distances[leftIndex - 1] + 1;
int editCost = previousDistances[leftIndex] + 1;
int deletionCost = previousDistances[leftIndex - 1];
if(left.charAt(leftIndex - 1) != rightChar)
{
deletionCost++;
}
distances[leftIndex] = Ints.min(insertionCost, editCost, deletionCost);
}
// Swap current distance counts to 'previous row' distance counts
int[] temp = previousDistances;
previousDistances = distances;
distances = temp;
}
// our last action in the above loop was to switch distances and
// previousDistances, so
// previousDistances now actually has the most recent cost counts
return previousDistances[leftLength];
}
/**
* Returns {@code number} expressed as a position. For example 0 returns
* "zeroth", 1 returns "first" and so forth up to "fifth". Higher positions
* are described as "6th", "7th" and so on.
*
* @throws IllegalArgumentException if {@code number} is negative
*/
@Nonnull
@CheckReturnValue
public static String numberToPositionalString(int number)
{
checkArgument(number >= 0, "Negative numbers don't have positions");
switch(number)
{
case 0:
return "zeroth";
case 1:
return "first";
case 2:
return "second";
case 3:
return "third";
case 4:
return "fourth";
case 5:
return "fifth";
}
return Integer.toString(number) + "th";
}
/**
* Finds the {@code nth} occurrence of {@code needle} in {@code haystack}
*
* @param nth how many occurrences of {@code needle} that should occur before the returned index
* @param needle the string to search for
* @param haystack the string to search within
* @return the starting index of the {@code nth} occurrence of {@code needle} within
* {@code haystack}, -1 if {@code nth} occurrences couldn't be found
*/
public static int indexOfNth(int nth, String needle, String haystack)
{
checkNotNull(haystack);
checkNotNull(needle);
checkArgument(nth > 0, "nth must be at least 1 (was %s)", nth);
int occurencesFound = 0;
int index = -1;
while(occurencesFound < nth)
{
index = haystack.indexOf(needle, index + 1);
occurencesFound++;
if(index == -1)
{
break;
}
}
return index;
}
}