/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1593 $ by $Author: hirenj $ on $Date:: 2009-08-14 #$ */ package org.eurocarbdb.util; import java.util.Map; import java.util.List; import java.util.Arrays; import java.util.ArrayList; import java.util.Collection; import java.net.URI; import java.util.Date; import java.text.DateFormat; import org.apache.log4j.Logger; /** * A utility class to provide various string operations that really * ought to already have been included in Java in the first place. * * @author mjh */ public final class StringUtils { /** logging handle */ static Logger log = Logger.getLogger( StringUtils.class ); /** Shortcut to the java system property "line.separator" for * platform-independant carriage returns. */ public static final String CR = System.getProperty("line.separator"); /** The DateFormat used for parsing {@link Date}s in the {@link coerce} * method. The default is to use a date/time format consistent with the * current default locale. To reset to the default after changing, set to null. */ public static DateFormat defaultDateFormat = null; /** class has static methods only */ private StringUtils() {} /** * Attempts to coerce or derive the given {@link String} value * to the given {@link Class}. For example, most primitive classes * can be coerced, eg: *<pre> * int i = coerce( "234", Integer.class ); * long l = coerce( "234", Long.class ); * float f = coerce( "234.567", Float.class ); * double f = coerce( "234.567", Double.class ); * boolean b = coerce( "false", Boolean.class ); *</pre> * * Certain additional non-primitive classes are also supported. *<pre> * Date d = coerce("Dec 15, 2008", Date.class ); // kinda broken atm... * URI u = coerce("http://www.eurocarbdb.org", URI.class ); *</pre> * * @throws IllegalArgumentException * If the property value could not be coerced to the given class * (ie: there was an exception), or if the passed String was null. * @throws UnsupportedOperationException * If there was no support for coercion to the given class. */ public static final <T> T coerce( String value, Class<T> to_class ) { if ( value == null ) throw new IllegalArgumentException( "Passed String value was null"); try { // primitives if ( to_class == Integer.class ) return (T) new Integer( value ); if ( to_class == Double.class ) return (T) new Double( value ); if ( to_class == Boolean.class ) return (T) new Boolean( value ); if ( to_class == Long.class ) return (T) new Long( value ); if ( to_class == Float.class ) return (T) new Float( value ); if ( to_class == Character.class ) return (T) new Character( value.charAt( 0 ) ); // other classes... // Date parsing is wtf broken, commenting out for now... /* else if ( to_class == Date.class ) { if ( defaultDateFormat == null ) { defaultDateFormat = DateFormat.getDateTimeInstance( DateFormat.LONG, DateFormat.LONG ); defaultDateFormat.setLenient( true ); } synchronized ( defaultDateFormat ) { return (T) defaultDateFormat.parse( value ); } } */ else if ( to_class == URI.class ) { return (T) new URI( value ); } } catch ( Exception ex ) { throw new IllegalArgumentException( "Couldn't coerce '" + value + "' to " + to_class , ex ); } // fallback if not coerceable throw new UnsupportedOperationException( "Coercion to " + to_class + " is not (yet?) supported" ); } /** * Attempts to derive an abbreviation for the given phrase, minus * prepositional words like 'and', 'the', 'of', 'from', etc. A phrase * consisting of a single word once prepositions are removed is * returned unchanged. *<br/> * Examples: *<ul> * <li>"Journal of Biological Chemistry" becomes "J.B.C."</li> * <li>"The house of the rising sun" becomes "H.R.S."</li> * <li>"The quick brown fox jumped over the lazy hare" becomes "Q.B.F.J.O.L.H."</li> * <li>"The doctor" becomes "The doctor" (unchanged) * <li>"Medicine" becomes "Medicine" (unchanged) *</ul> */ public static final String guessAbbreviationFor( String phrase ) { String[] words = phrase.toLowerCase().split("((\\b(and|of|the|to|a|from|in|&)\\b)\\s*)|\\s+"); if ( words.length == 0 ) return phrase; if ( words.length == 1 ) return phrase; char[] letters = new char[ words.length * 2 ]; int count = 0; for ( String s : words ) { if ( s.length() == 0 ) continue; letters[count++] = s.charAt( 0 ); letters[count++] = '.'; } if ( count == 2 ) return phrase; return new String( letters ).toUpperCase(); } /* join *//**************************************************** * * Joins a list of strings (objects) by the given join string, * as per the Perl function 'join'. Why this method isn't already * in the language is a mystery. This method has the unusual form * <tt>(String, Object, Object... )</tt> to eliminate compiler ambiguity * when encountering code that uses <tt>join( String, Object[] )</tt>. * * @param join_string * The string to be used for joining. * @param item * The first object in the list of items to be joined. * @param other_items * A list of additional objects/strings to join. * @return * The joined string. */ public static final String join( String join_string, Object item, Object... other_items ) { if ( other_items.length == 0 ) return item.toString(); if ( other_items.length == 1 ) return item.toString() + join_string + other_items[0]; StringBuilder sb = new StringBuilder(); sb.append( item ); for ( int i = 0; i < other_items.length; i++ ) { sb.append( join_string ); sb.append( other_items[i] ); } return sb.toString(); } /* join *//**************************************************** * * @see #join(String, Object, Object[]) */ public static final String join( String join_string, Object[] a ) { //return join( join_string, c.toArray() ); if ( a == null ) return null; if ( a.length == 0 ) return ""; StringBuilder sb = new StringBuilder(); sb.append( a[0] ); for ( int i = 1; i < a.length; i++ ) { sb.append( join_string ); sb.append( a[i] ); } return sb.toString(); } /* public static final <Arg,Res> List<Res> map( List<Arg> list, MapFunc<Arg,Res> func ) { if ( list == null ) throw new IllegalArgumentException( "Argument 'list' can't be null"); List<Res> newlist = new ArrayList<Res>( list.size() ); for ( Arg a : list ) newlist.add( func.process( a ) ); return newlist; } public static final <A> List<A> grep( List<A> list, MapFunc<A,Boolean> func ) { if ( list == null ) throw new IllegalArgumentException( "Argument 'list' can't be null"); List<A> newlist = new ArrayList<A>( list.size() ); for ( A a : list ) if ( func.process( a ) ) newlist.add( a ); return newlist; } public interface MapFunc<Arg,Res> { public Res process( Arg a ); } public interface GrepFunc<Arg> { public boolean process( Arg a ); } */ /* join *//**************************************************** * * @see #join(String, Object[]) */ public static final String join( String join_string, Collection c ) { return join( join_string, c.toArray() ); } /* join *//**************************************************** * * Joins the list of strings given by the objects in the passed Map * by the given join strings, as per the Perl function 'join'. * * For example, the Map - * <pre> * Map m = { // java should have a declarative syntax for Maps... * name => "Matt" * age => 32 * } * </pre> * * when called as <code> join( m, ": ", ", " ) </code> would return * <code> "name: Matt, age: 32" </code> as a result. Note that the order * of key-value pairs in the final string is arbitrary -- use the 4-argument * form of this method if you want the keys to appear in a specific order * or if you want only certain keys/values printed. * * @param map * The map. * @param join_string1 * The string to use to join key-value pairs from the Map. * @param join_string2 * The string to use between joined key-value pairs from the Map. * @return * The joined string. * @see #join(String, Object[]) */ public static final String join( Map map, String join_string1, String join_string2 ) { return join( map, join_string1, join_string2, map.keySet().toArray() ); } /* join *//**************************************************** * * Same as the other join method for a Map, except the keys to be * joined are explicitly given. No, this method doesn't check that * the given keys actually exist in the Map, that's your problem. * * @param map * The map. * @param join_string1 * The string to use to join key-value pairs from the Map. * @param join_string2 * The string to use between joined key-value pairs from the Map. * @param keys * An array which specifies which keys to be joined and in what order. * @return * The joined string. * @see #join(Map, String, String) */ public static final String join( Map map, String join_string1, String join_string2, Object[] keys ) { if ( map == null || map.isEmpty() ) return ""; StringBuilder sb = new StringBuilder( keys[0] + join_string1 + map.get( keys[0] ) ); for ( int i = 1; i < map.size(); i++ ) { sb.append( join_string2 + keys[i] + join_string1 + map.get( keys[i] ) ); } return sb.toString(); } /* repeat *//************************************************** * * Returns a string that is formed by the repetition of a provided * string a given number of times. * * @param string * The string to be copied. * @param repeat * The number of times to repeat the given string. * @return * The joined string. * @throws IllegalArgumentException * if argument <tt>times</tt> is < 0 or passed string is null */ public static final String repeat( String string, int times ) { if ( times < 0 ) throw new IllegalArgumentException( "Argument 'times' cannot be less than 0"); if ( string == null ) throw new IllegalArgumentException( "Argument 'string' cannot be null"); if ( times == 1 ) return string; if ( times == 0 || string.length() == 0 ) return ""; StringBuilder sb = new StringBuilder( string.length() * times ); while ( times-- > 0 ) sb.append( string ); return sb.toString(); } /* repeat *//************************************************** * * Returns a string that is formed by the repetition of the given * <tt>char</tt> the given number of times, eg: *<pre> * // returns "aaaaa" * repeat('a', 5 ); </pre> */ public static final String repeat( char c, int times ) { if ( times < 0 ) throw new IllegalArgumentException( "Argument 'times' cannot be less than 0"); if ( times == 0 ) return ""; if ( times == 1 ) return Character.toString( c ); char[] chars = new char[times]; Arrays.fill( chars, c ); return new String( chars ); } /* split *//*************************************************** * * Splits the given String by the given char, vaguely similar to the * <a href="http://perldoc.perl.org/functions/split.html"> * Perl function 'split'</a>. *<pre> * // returns [ "abc", "def", "", "ghi" ] * split(';', "abc;def;;ghi") </pre> */ public static final List<String> split( char c, String s ) { List<String> strings = new ArrayList<String>(); int start = 0; for ( int i = 0; i < s.length(); i++ ) { if ( s.charAt( i ) != c ) continue; strings.add( s.substring( start, i ) ); start = i + 1; } if ( start <= s.length() ) strings.add( s.substring( start ) ); return strings; } /* toCamelCase *//********************************************* *<p> * Converts underscore_case_like_this to camelCaseStrings. *</p> *<p> * <code>"a_name_like_this"</code> becomes <code>"aNameLikeThis"</code><br/> * <code>"an_ame_like_this"</code> becomes <code>"anAmeLikeThis"</code><br/> * <code>"aname_like_this"</code> becomes <code>"anameLikeThis"</code><br/> * <code>"named_like_this"</code> becomes <code>"namedLikeThis"</code><br/> * <code>"abba_was_great"</code> becomes <code>"abbaWasGreat"</code><br/> * <code>"abcd"</code> becomes <code>"abcd"</code><br/> * <code>"the_abba_collection"</code> becomes <code>"theAbbaCollection"</code><br/> * <code>"the_abba_collection"</code> becomes <code>"theAbbaCollection"</code><br/> * <code>"anamelikethis"</code> becomes <code>"anamelikethis"</code><br/> * <code>"png_ase_f"</code> becomes <code>"pngAseF"</code><br/> * <code>"po_li_ce_ma_n"</code> becomes <code>"poLiCeMaN"</code><br/> * <code>"xml_parser"</code> becomes <code>"xmlParser"</code><br/> * <code>"the_xml_parser"</code> becomes <code>"theXmlParser"</code><br/> * <code>"ecma_script"</code> becomes <code>"ecmaScript"</code><br/> *</p> *<p> * Note that the conversion from under_score to camelCase is not * necessarily reversible by the method toUnderscoreCase() although * in most cases, it will be. *</p> */ public static final String toCamelCase( String underscore_case ) { if ( underscore_case == null ) return null; if ( underscore_case.length() == 0 ) return ""; String[] pieces = underscore_case.split("_"); if ( pieces.length == 1 ) return underscore_case; for ( int i = 1; i < pieces.length; i++ ) pieces[i] = ucfirst( pieces[i] ); return join( "", (Object[]) pieces ); } /* toUnderscoreCase *//**************************************** *<p> * Converts camelCaseStrings to underscore_case_like_this. *</p> *<p> * <code>"aNameLikeThis"</code> becomes <code>"a_name_like_this"</code><br/> * <code>"ANameLikeThis"</code> becomes <code>"an_ame_like_this"</code><br/> * <code>"AnameLikeThis"</code> becomes <code>"aname_like_this"</code><br/> * <code>"NamedLikeThis"</code> becomes <code>"named_like_this"</code><br/> * <code>"ABBA_wasGreat"</code> becomes <code>"abba_was_great"</code><br/> * <code>"ABCD"</code> becomes <code>"abcd"</code><br/> * <code>"TheABBAcollection"</code> becomes <code>"the_abba_collection"</code><br/> * <code>"The_ABBA_Collection"</code> becomes <code>"the_abba_collection"</code><br/> * <code>"anamelikethis"</code> becomes <code>"anamelikethis"</code><br/> * <code>"PNGaseF"</code> becomes <code>"png_ase_f"</code><br/> * <code>"PoLiCeMaN"</code> becomes <code>"po_li_ce_ma_n"</code><br/> * <code>"XMLparser"</code> becomes <code>"xml_parser"</code><br/> * <code>"theXMLparser"</code> becomes <code>"the_xml_parser"</code><br/> * <code>"ECMAscript"</code> becomes <code>"ecma_script"</code><br/> *</p> *<p> * Note that the conversion from camelCase to under_score is not * necessarily reversible by the method toCamelCase(), although * in most cases, it will be. *</p> */ public static final String toUnderscoreCase( String camelCase ) { if ( camelCase == null ) return null; if ( camelCase.length() == 0 ) return ""; String[] pieces = camelCase.split( "(?<=[^A-Z_])(?=[A-Z])|(?:(?<=[A-Z][A-Z])(?=[^A-Z_]))"); return join( "_", pieces ).toLowerCase(); } /* lcfirst *//************************************************* * * Lower-cases the first character of the passed string. */ public static final String lcfirst( String s ) { if ( s == null ) return null; if ( s.length() == 0 ) return ""; if ( Character.isLowerCase( s.charAt( 0 ) ) ) return s; char[] chars = s.toCharArray(); chars[0] = Character.toLowerCase( chars[0] ); return new String( chars ); } /* ucfirst *//************************************************* * * Upper-cases the first character of the passed string. */ public static final String ucfirst( String s ) { if ( s == null ) return null; if ( s.length() == 0 ) return ""; if ( Character.isUpperCase( s.charAt( 0 ) ) ) return s; char[] chars = s.toCharArray(); chars[0] = Character.toUpperCase( chars[0] ); return new String( chars ); } /* paramToInt * Convert a parameter object (either a single String, or an array of Strings) * into an int */ public static final int paramToInt(Object input) { if (input.getClass().isArray()) { input = ((Object[]) input)[0]; } if (input instanceof String) { if (((String) input).length() == 0) { return 0; } return Integer.parseInt((String) input); } throw new IllegalArgumentException( "Argument 'input' is neither an array of strings, or a String"); } } // end class