/*
* Copyright (c) 2006, Stephen Kelvin Friedrich, All rights reserved.
*
* This a BSD license. If you use or enhance the code, I'd be pleased if you sent a mail to s.friedrich@eekboom.com
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
* following conditions are met:
* * Redistributions of source code must retain the above copyright notice, this list of conditions and the
* following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with the distribution.
* * Neither the name of the "Stephen Kelvin Friedrich" nor the names of its contributors may be used to endorse
* or promote products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//package com.eekboom.utils;
package tufts;
import java.text.Collator;
import java.util.Comparator;
/**
* Utility class for common String operations
*/
public final class Strings {
/**
* <p>A string comparator that does case sensitive comparisons and handles embedded numbers correctly.</p>
* <p><b>Do not use</b> if your app might ever run on any locale that uses more than 7-bit ascii characters.</p>
*/
private static final Comparator<String> NATURAL_COMPARATOR_ASCII = new Comparator<String>() {
public int compare(String o1, String o2) {
return compareNaturalAscii(o1, o2);
}
};
/**
* <p>A string comparator that does case insensitive comparisons and handles embedded numbers correctly.</p>
* <p><b>Do not use</b> if your app might ever run on any locale that uses more than 7-bit ascii characters.</p>
*/
private static final Comparator<String> IGNORE_CASE_NATURAL_COMPARATOR_ASCII = new Comparator<String>() {
public int compare(String o1, String o2) {
return compareNaturalIgnoreCaseAscii(o1, o2);
}
};
/**
* This is a utility class (static methods only), don't instantiate.
*/
private Strings() {
}
/**
* Returns a comparator that compares contained numbers based on their numeric values and compares other parts
* using the current locale's order rules.
* <p>For example in German locale this will be a comparator that handles umlauts correctly and ignores
* upper/lower case differences.</p>
*
* @return <p>A string comparator that uses the current locale's order rules and handles embedded numbers
* correctly.</p>
* @see #getNaturalComparator(java.text.Collator)
*/
public static Comparator<String> getNaturalComparator() {
Collator collator = Collator.getInstance();
return getNaturalComparator(collator);
}
/**
* Returns a comparator that compares contained numbers based on their numeric values and compares other parts
* using the given collator.
*
* @param collator used for locale specific comparison of text (non-number) subwords - must not be null
* @return <p>A string comparator that uses the given Collator to compare subwords and handles embedded numbers
* correctly.</p>
* @see #getNaturalComparator()
*/
public static Comparator<String> getNaturalComparator(final Collator collator) {
if(collator == null) {
// it's important to explicitly handle this here - else the bug will manifest anytime later in possibly
// unrelated code that tries to use the comparator
throw new NullPointerException("collator must not be null");
}
return new Comparator<String>() {
public int compare(String o1, String o2) {
return compareNatural(collator, o1, o2);
}
};
}
/**
* Returns a comparator that compares contained numbers based on their numeric values and compares other parts
* based on each character's Unicode value.
*
* @return <p>a string comparator that does case sensitive comparisons on pure ascii strings and handles embedded
* numbers correctly.</p>
* <b>Do not use</b> if your app might ever run on any locale that uses more than 7-bit ascii characters.
* @see #getNaturalComparator()
* @see #getNaturalComparator(java.text.Collator)
*/
public static Comparator<String> getNaturalComparatorAscii() {
return NATURAL_COMPARATOR_ASCII;
}
/**
* Returns a comparator that compares contained numbers based on their numeric values and compares other parts
* based on each character's Unicode value while ignore upper/lower case differences.
* <b>Do not use</b> if your app might ever run on any locale that uses more than 7-bit ascii characters.
*
* @return <p>a string comparator that does case insensitive comparisons on pure ascii strings and handles embedded
* numbers correctly.</p>
* @see #getNaturalComparator()
* @see #getNaturalComparator(java.text.Collator)
*/
public static Comparator<String> getNaturalComparatorIgnoreCaseAscii() {
return IGNORE_CASE_NATURAL_COMPARATOR_ASCII;
}
/**
* <p>Compares two strings using the current locale's rules and comparing contained numbers based on their numeric
* values.</p>
* <p>This is probably the best default comparison to use.</p>
* <p>If you know that the texts to be compared are in a certain language that differs from the default locale's
* langage, then get a collator for the desired locale ({@link java.text.Collator#getInstance(java.util.Locale)})
* and pass it to {@link #compareNatural(java.text.Collator, String, String)}</p>
*
* @param s first string
* @param t second string
* @return zero iff <code>s</code> and <code>t</code> are equal,
* a value less than zero iff <code>s</code> lexicographically precedes <code>t</code>
* and a value larger than zero iff <code>s</code> lexicographically follows <code>t</code>
*/
public static int compareNatural(String s, String t) {
return compareNatural(s, t, false, Collator.getInstance());
}
/**
* <p>Compares two strings using the given collator and comparing contained numbers based on their numeric
* values.</p>
*
* @param s first string
* @param t second string
* @return zero iff <code>s</code> and <code>t</code> are equal,
* a value less than zero iff <code>s</code> lexicographically precedes <code>t</code>
* and a value larger than zero iff <code>s</code> lexicographically follows <code>t</code>
*/
public static int compareNatural(Collator collator, String s, String t) {
return compareNatural(s, t, true, collator);
}
/**
* <p>Compares two strings using each character's Unicode value for non-digit characters and the numeric values off
* any contained numbers.</p>
* <p>(This will probably make sense only for strings containing 7-bit ascii characters only.)</p>
*
* @return zero iff <code>s</code> and <code>t</code> are equal,
* a value less than zero iff <code>s</code> lexicographically precedes <code>t</code>
* and a value larger than zero iff <code>s</code> lexicographically follows <code>t</code>
*/
public static int compareNaturalAscii(String s, String t) {
return compareNatural(s, t, true, null);
}
/**
* <p>Compares two strings using each character's Unicode value - ignoring upper/lower case - for non-digit
* characters and the numeric values of any contained numbers.</p>
* <p>(This will probably make sense only for strings containing 7-bit ascii characters only.)</p>
*
* @return zero iff <code>s</code> and <code>t</code> are equal,
* a value less than zero iff <code>s</code> lexicographically precedes <code>t</code>
* and a value larger than zero iff <code>s</code> lexicographically follows <code>t</code>
*/
public static int compareNaturalIgnoreCaseAscii(String s, String t) {
return compareNatural(s, t, false, null);
}
/**
* @param s first string
* @param t second string
* @param caseSensitive treat characters differing in case only as equal - will be ignored if a collator is given
* @param collator used to compare subwords that aren't numbers - if null, characters will be compared
* individually based on their Unicode value
* @return zero iff <code>s</code> and <code>t</code> are equal,
* a value less than zero iff <code>s</code> lexicographically precedes <code>t</code>
* and a value larger than zero iff <code>s</code> lexicographically follows <code>t</code>
*/
private static int compareNatural(String s, String t, boolean caseSensitive, Collator collator) {
int sIndex = 0;
int tIndex = 0;
int sLength = s.length();
int tLength = t.length();
while(true) {
// both character indices are after a subword (or at zero)
// Check if one string is at end
if(sIndex == sLength && tIndex == tLength) {
return 0;
}
if(sIndex == sLength) {
return -1;
}
if(tIndex == tLength) {
return 1;
}
// Compare sub word
char sChar = s.charAt(sIndex);
char tChar = t.charAt(tIndex);
boolean sCharIsDigit = Character.isDigit(sChar);
boolean tCharIsDigit = Character.isDigit(tChar);
if(sCharIsDigit && tCharIsDigit) {
// Compare numbers
// skip leading 0s
int sLeadingZeroCount = 0;
while(sChar == '0') {
++sLeadingZeroCount;
++sIndex;
if(sIndex == sLength) {
break;
}
sChar = s.charAt(sIndex);
}
int tLeadingZeroCount = 0;
while(tChar == '0') {
++tLeadingZeroCount;
++tIndex;
if(tIndex == tLength) {
break;
}
tChar = t.charAt(tIndex);
}
boolean sAllZero = sIndex == sLength || !Character.isDigit(sChar);
boolean tAllZero = tIndex == tLength || !Character.isDigit(tChar);
if(sAllZero && tAllZero) {
continue;
}
if(sAllZero && !tAllZero) {
return -1;
}
if(tAllZero) {
return 1;
}
int diff = 0;
do {
if(diff == 0) {
diff = sChar - tChar;
}
++sIndex;
++tIndex;
if(sIndex == sLength && tIndex == tLength) {
return diff != 0 ? diff : sLeadingZeroCount - tLeadingZeroCount;
}
if(sIndex == sLength) {
if(diff == 0) {
return -1;
}
return Character.isDigit(t.charAt(tIndex)) ? -1 : diff;
}
if(tIndex == tLength) {
if(diff == 0) {
return 1;
}
return Character.isDigit(s.charAt(sIndex)) ? 1 : diff;
}
sChar = s.charAt(sIndex);
tChar = t.charAt(tIndex);
sCharIsDigit = Character.isDigit(sChar);
tCharIsDigit = Character.isDigit(tChar);
if(!sCharIsDigit && !tCharIsDigit) {
// both number sub words have the same length
if(diff != 0) {
return diff;
}
break;
}
if(!sCharIsDigit) {
return -1;
}
if(!tCharIsDigit) {
return 1;
}
} while(true);
}
else {
// Compare words
if(collator != null) {
// To use the collator the whole subwords have to be compared - character-by-character comparision
// is not possible. So find the two subwords first
int aw = sIndex;
int bw = tIndex;
do {
++sIndex;
} while(sIndex < sLength && !Character.isDigit(s.charAt(sIndex)));
do {
++tIndex;
} while(tIndex < tLength && !Character.isDigit(t.charAt(tIndex)));
String as = s.substring(aw, sIndex);
String bs = t.substring(bw, tIndex);
int subwordResult = collator.compare(as, bs);
if(subwordResult != 0) {
return subwordResult;
}
}
else {
// No collator specified. All characters should be ascii only. Compare character-by-character.
do {
if(sChar != tChar) {
if(caseSensitive) {
return sChar - tChar;
}
sChar = Character.toUpperCase(sChar);
tChar = Character.toUpperCase(tChar);
if(sChar != tChar) {
sChar = Character.toLowerCase(sChar);
tChar = Character.toLowerCase(tChar);
if(sChar != tChar) {
return sChar - tChar;
}
}
}
++sIndex;
++tIndex;
if(sIndex == sLength && tIndex == tLength) {
return 0;
}
if(sIndex == sLength) {
return -1;
}
if(tIndex == tLength) {
return 1;
}
sChar = s.charAt(sIndex);
tChar = t.charAt(tIndex);
sCharIsDigit = Character.isDigit(sChar);
tCharIsDigit = Character.isDigit(tChar);
} while(!sCharIsDigit && !tCharIsDigit);
}
}
}
}
}