package ua.stu.scplib.tools; import java.text.Collator; import java.util.StringTokenizer; import java.util.Vector; /** * <p>Various static methods helpful for comparing and manipulating strings.</p> * * @author dclunie */ public class StringUtilities { private static Collator ourCollator = Collator.getInstance(); static { ourCollator.setStrength(Collator.IDENTICAL); ourCollator.setDecomposition(Collator.FULL_DECOMPOSITION); } private StringUtilities() {} /** * <p>Does a string contain any one of an array of strings ?</p> * * @param string the string to test * @param substrings an array of strings to look for within string * @return true if any string in substrings is found within string */ static public final boolean contains(String string,String[] substrings) { if (string != null && substrings != null) { for (String substring : substrings) { if (string.contains(substring)) { return true; } } } return false; } /** * <p>Does a string contain any one of an array of strings regardless of case ?</p> * * @param string the string to test * @param substrings an array of strings to look for within string * @return true if any string in substrings is found within string */ static public final boolean containsRegardlessOfCase(String string,String[] substrings) { if (string != null && substrings != null) { string = string.toLowerCase(); for (String substring : substrings) { if (string.contains(substring.toLowerCase())) { return true; } } } return false; } /** * <p>Get delimited values from a string.</p> * * <p>Consecutive delimiters result in an empty (zero length not null) string value.</p> * * <p>Hence always returns a Vector one longer than the number of delimiters present.</p> * * @param string the string containing the delimited values * @param delimiter the delimiter * @return a Vector of String values */ static public final Vector getDelimitedValues(String string,String delimiter) { Vector r = new Vector(); StringTokenizer t = new StringTokenizer(string,delimiter,true); boolean lastWasDelimiter = true; while (t.hasMoreTokens()) { String v = t.nextToken(); if (v.equals(delimiter)) { // consecutive delimiters (and start) means null value if (lastWasDelimiter) { r.add(""); } lastWasDelimiter = true; } else { r.add(v); lastWasDelimiter = false; } } if (lastWasDelimiter) { // handle empty last value r.add(""); } return r; } /** * <p>Remove any trailing instances of a particular character from a string.</p> * * @param src the string that may have trailing characters * @param rmchar the character, all trailing instances of which are to be removed * @return the value of the string argument with any instances of the trailing character removed */ static public final String removeTrailingCharacter(String src,char rmchar) { char [] c = src.toCharArray(); int l = c.length; int n = l; while (n > 0 && c[n-1] == rmchar) --n; return n == l ? src : (n > 0 ? new String(c,0,n) : ""); } /** * <p>Remove any trailing instances of whitespace or control characters from a string.</p> * * @param src the string that may have trailing characters * @return the value of the string argument with any instances of trailing whitespace or control characters removed */ static public final String removeTrailingWhitespaceOrISOControl(String src) { char [] c = src.toCharArray(); int l = c.length; int n = l; while (n > 0 && (Character.isWhitespace(c[n-1]) || Character.isISOControl(c[n-1]))) --n; return n == l ? src : (n > 0 ? new String(c,0,n) : ""); } /** * <p>Remove any leading instances of a particular character from a string.</p> * * @param src the string that may have leading characters * @param rmchar the character, all leading instances of which are to be removed * @return the value of the string argument with any instances of the leading character removed */ static public final String removeLeadingCharacter(String src,char rmchar) { char [] c = src.toCharArray(); int l = c.length; int i = 0; while (i < l && c[i] == rmchar) ++i; return i == 0 ? src : (i < l ? new String(c,i,l-i) : ""); } /** * <p>Remove any leading instances of whitespace or control characters from a string.</p> * * @param src the string that may have trailing characters * @return the value of the string argument with any instances of trailing whitespace or control characters removed */ static public final String removeLeadingWhitespaceOrISOControl(String src) { char [] c = src.toCharArray(); int l = c.length; int i = 0; while (i < l && (Character.isWhitespace(c[i]) || Character.isISOControl(c[i]))) ++i; return i == 0 ? src : (i < l ? new String(c,i,l-i) : ""); } /** * <p>Remove any trailing spaces from a string.</p> * * @param src the string that may have trailing spaces * @return the value of the string argument with any trailing spaces removed */ static public final String removeTrailingSpaces(String src) { return removeTrailingCharacter(src,' '); } /** * <p>Remove any leading spaces from a string.</p> * * @param src the string that may have leading spaces * @return the value of the string argument with any leading spaces removed */ static public final String removeLeadingSpaces(String src) { return removeLeadingCharacter(src,' '); } /** * <p>Remove any leading or trailing padding from a string.</p> * * <p>Padding in this context means leading or trailing white space of any kind or null characters.</p> * * @param src the string that may have padding * @return the value of the string argument with any padding removed */ static public final String removeLeadingOrTrailingWhitespaceOrISOControl(String src) { return removeTrailingWhitespaceOrISOControl(removeLeadingWhitespaceOrISOControl(src)); } /** * <p>Compare strings based on their integer value of they are both integers, * otherwise their lexicographic order.</p> * * <p>For example, * <code>"001"</code> and<code>"1"</code> would be treated as equal, whilst * <code>"1"</code> would be considered as occuring before <code>"10"</code>, * which would not be the case with a simple lexicographic ordering. * </p> * * @param s1 the first of two strings to be compared * @param s2 the first of two strings to be compared * @return the value 0 if the first string is equal to the second string; a value less than 0 if the first string * is less than the second string; and a value greater than 0 if the first string * is greater than the second string */ static public final int compareStringsThatMayBeIntegers(String s1,String s2) { try { return Integer.parseInt(s1) - Integer.parseInt(s2); } catch (NumberFormatException e) { //System.err.println("compareStringsThatMayBeIntegers: falling back to string"); return ourCollator.compare(s1,s2); //return s1.compareTo(s2); } } /** * <p>Compare strings based on the lexicographic order of their values, but accounting for non-zero padded integers.</p> * * <p>Note that the comparison is more complex than a simple lexicographic comparison * of strings (as described in the definition of {@link java.lang.String#compareTo(String) java.lang.String.compareTo(String)} * but rather accounts for embedded non-zero padded integers by treating occurrences of space * delimited integers as integer values rather than strings. For example, * <code>"a 001 b"</code> and<code>"a 1 b"</code> would be treated as equal, whilst * <code>"a 1 b"</code> would be considered as occuring before <code>"a 10 b"</code>, * which would not be the case with a simple lexicographic ordering. * </p> * * @param s1 the first of two strings to be compared * @param s2 the first of two strings to be compared * @return the value 0 if the first string is equal to the second string; a value less than 0 if the first string * is lexicographically less than the second string; and a value greater than 0 if the first string * is lexicographically greater than the second string */ static public final int compareStringsWithEmbeddedNonZeroPaddedIntegers(String s1,String s2) { StringTokenizer st1 = new StringTokenizer(s1); StringTokenizer st2 = new StringTokenizer(s2); int c = 0; while (st1.hasMoreElements() && st2.hasMoreElements()) { String t1 = st1.nextToken(); String t2 = st2.nextToken(); c = compareStringsThatMayBeIntegers(t1,t2); //System.err.println("compareStringsWithEmbeddedNonZeroPaddedIntegers: looping with <"+t1+"> and <"+t2+"> c="+c); if (c != 0) return c; } c = st1.hasMoreElements() ? 1 : (st1.hasMoreElements() ? -1 : 0); return c; } /** * <p>Create a dump of the decimal offset, hexadecimal values and printable string values of a String.</p> * * @param s the String to be dumped as if it were an array of char * @return a string containing the multiline result */ public static String dump(String s) { return dump(s.toCharArray()); } /** * <p>Create a dump of the decimal offset, hexadecimal values and printable string values of an array of char.</p> * * @param chars the array of char to be dumped * @return a string containing the multiline result */ public static String dump(char[] chars) { int offset = 0; int lng = chars == null ? 0 : chars.length; StringBuffer sb = new StringBuffer(); if (chars != null && lng > 0) { int i=0; int stringStart=0; int stringCount=0; while (i < lng) { int position = i+offset; if (i%16 == 0) { if (i != 0) sb.append("\n"); sb.append(HexDump.intToPaddedDecimalString(position)); sb.append(" ("); sb.append(HexDump.intToPaddedHexStringWith0x(position)); sb.append("):"); stringStart=position; stringCount=0; } sb.append(" "); sb.append(HexDump.shortToPaddedHexString((short)(chars[position]))); ++i; ++stringCount; if (i%16 == 0 || i == lng) { sb.append(" "); sb.append(new String(chars,stringStart,stringCount)); } } } sb.append("\n"); return sb.toString(); } /* * <p>Dump an array as a human-readable string.</p> * * @param doubleArray to dump * @return a string representation */ public static String toString(double[] doubleArray) { if (doubleArray == null) { return null; } else { if (doubleArray.length == 0) { return ""; } else { String delimiter=""; StringBuffer strbuf = new StringBuffer(); for (int i=0; i<doubleArray.length; ++i) { strbuf.append(delimiter); strbuf.append("["); strbuf.append(i); strbuf.append("]="); strbuf.append(doubleArray[i]); delimiter=" "; } return strbuf.toString(); } } } /* * <p>Dump an array as a human-readable string.</p> * * @param stringArray to dump * @return a string representation */ public static String toString(String[] stringArray) { if (stringArray == null) { return null; } else { if (stringArray.length == 0) { return ""; } else { String delimiter=""; StringBuffer strbuf = new StringBuffer(); for (int i=0; i<stringArray.length; ++i) { strbuf.append(delimiter); strbuf.append("["); strbuf.append(i); strbuf.append("]="); strbuf.append(stringArray[i]); delimiter=" "; } return strbuf.toString(); } } } /* * <p>Dump an array of arrays as a human-readable string.</p> * * @param stringArrays to dump * @return a string representation */ public static String toString(String[][] stringArrays) { if (stringArrays == null) { return null; } else { if (stringArrays.length == 0) { return ""; } else { String delimiter=""; StringBuffer strbuf = new StringBuffer(); for (int i=0; i<stringArrays.length; ++i) { strbuf.append(delimiter); strbuf.append("["); strbuf.append(i); strbuf.append("]={"); strbuf.append(toString(stringArrays[i])); strbuf.append("}"); delimiter=" "; } return strbuf.toString(); } } } /** * <p>Unit testing.</p> * * @param arg ignored */ public static void main(String arg[]) { String s; s="1234"; System.err.println("src <"+s+"> dst <"+removeTrailingSpaces(s)+">"); s="1234 "; System.err.println("src <"+s+"> dst <"+removeTrailingSpaces(s)+">"); s="12 34 "; System.err.println("src <"+s+"> dst <"+removeTrailingSpaces(s)+">"); s=" 1234"; System.err.println("src <"+s+"> dst <"+removeTrailingSpaces(s)+">"); s=" 1234 "; System.err.println("src <"+s+"> dst <"+removeTrailingSpaces(s)+">"); s="1"; System.err.println("src <"+s+"> dst <"+removeTrailingSpaces(s)+">"); s=" "; System.err.println("src <"+s+"> dst <"+removeTrailingSpaces(s)+">"); s=" "; System.err.println("src <"+s+"> dst <"+removeTrailingSpaces(s)+">"); s=""; System.err.println("src <"+s+"> dst <"+removeTrailingSpaces(s)+">"); String s1; String s2; s1 = "this is 2 way"; s2 = "this is 2 way"; System.err.println("s1 <"+s1+"> s2 <"+s2+"> : compare ="+compareStringsWithEmbeddedNonZeroPaddedIntegers(s1,s2)); s1 = "this is 2 way"; s2 = "this is 10 way"; System.err.println("s1 <"+s1+"> s2 <"+s2+"> : compare ="+compareStringsWithEmbeddedNonZeroPaddedIntegers(s1,s2)); s1 = "this is 10 way"; s2 = "this is 2 way"; System.err.println("s1 <"+s1+"> s2 <"+s2+"> : compare ="+compareStringsWithEmbeddedNonZeroPaddedIntegers(s1,s2)); s1 = "this is 2 way"; s2 = "this is 2 way plus"; System.err.println("s1 <"+s1+"> s2 <"+s2+"> : compare ="+compareStringsWithEmbeddedNonZeroPaddedIntegers(s1,s2)); s1 = "this is 2 way"; s2 = "this is 10 way plus"; System.err.println("s1 <"+s1+"> s2 <"+s2+"> : compare ="+compareStringsWithEmbeddedNonZeroPaddedIntegers(s1,s2)); s1 = "this is 10 way"; s2 = "this is 2 way plus"; System.err.println("s1 <"+s1+"> s2 <"+s2+"> : compare ="+compareStringsWithEmbeddedNonZeroPaddedIntegers(s1,s2)); } }