/** * Copyright (c) 2005-2017, KoLmafia development team * http://kolmafia.sourceforge.net/ * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * [1] Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * [2] Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * [3] Neither the name "KoLmafia" nor the names of its contributors may * be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ package net.sourceforge.kolmafia.utilities; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.net.URLEncoder; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.TimeZone; import java.util.WeakHashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import net.sourceforge.kolmafia.RequestLogger; public class StringUtilities { private static final HashMap<String, String> entityEncodeCache = new HashMap<String, String>(); private static final HashMap<String, String> entityDecodeCache = new HashMap<String, String>(); private static final HashMap<String, String> urlEncodeCache = new HashMap<String, String>(); private static final HashMap<String, String> urlDecodeCache = new HashMap<String, String>(); private static final HashMap<String, String> displayNameCache = new HashMap<String, String>(); private static final HashMap<String, String> canonicalNameCache = new HashMap<String, String>(); private static final HashMap<String, String> prepositionsMap = new HashMap<String, String>(); private static final WeakHashMap<String[], int[]> hashCache = new WeakHashMap<String[], int[]>(); private static final Pattern NONINTEGER_PATTERN = Pattern.compile( "[^0-9\\-]+" ); private static final Pattern PREPOSITIONS_PATTERN = Pattern.compile( "\\b(?:about|above|across|after|against|along|among|around|at|before|behind|" + "below|beneath|beside|between|beyond|by|down|during|except|for|from|in|inside|" + "into|like|near|of|off|on|onto|out|outside|over|past|through|throughout|to|" + "under|up|upon|with|within|without)\\b" ); private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat( "EEE, dd MMM yyyy HH:mm:ss zzz" ); static { DATE_FORMAT.setTimeZone( TimeZone.getTimeZone( "GMT" ) ); }; public static synchronized final long parseDate( final String dateString ) { if ( dateString != null ) { try { return StringUtilities.DATE_FORMAT.parse( dateString ).getTime(); } catch ( Exception e ) { } } return 0; } public static final String formatDate( final long date ) { return StringUtilities.formatDate( new Date( date ) ); } public static synchronized final String formatDate( final Date date ) { try { return StringUtilities.DATE_FORMAT.format( date ); } catch ( Exception e ) { return ""; } } public static final String getEncodedString( final byte [] bytes, final String encoding ) { try { return new String( bytes, encoding ); } catch ( UnsupportedEncodingException e ) { return ""; } } /** * Returns the encoded-encoded version of the provided UTF-8 string. */ public static final String getEntityEncode( final String utf8String ) { return StringUtilities.getEntityEncode( utf8String, true ); } public static final String getEntityEncode( String utf8String, final boolean cache ) { if ( utf8String == null ) { return utf8String; } String entityString = null; if ( cache ) { entityString = StringUtilities.entityEncodeCache.get( utf8String ); } if ( entityString == null ) { // If this string is a substring of a longer string, make sure // we aren't keeping a reference to the longer string. utf8String = new String( utf8String ); if ( utf8String.contains( "&" ) && utf8String.contains( ";" ) ) { entityString = CharacterEntities.escape( CharacterEntities.unescape( utf8String ) ); } else { entityString = CharacterEntities.escape( utf8String ); } // The following replacement makes the Hodgman journals (which have // a double space after the colon) unsearchable in the Mall. //entityString = StringUtilities.globalStringReplace( entityString, " ", " " ); if ( cache && utf8String.length() < 100 ) { StringUtilities.entityEncodeCache.put( utf8String, entityString ); } } return entityString; } /** * Returns the UTF-8 version of the provided character entity string. */ public static final String getEntityDecode( final String entityString ) { return StringUtilities.getEntityDecode( entityString, true ); } public static final String getEntityDecode( String entityString, final boolean cache ) { if ( entityString == null ) { return entityString; } String utf8String = null; if ( cache ) { utf8String = StringUtilities.entityDecodeCache.get( entityString ); } if ( utf8String == null ) { // If this string is a substring of a longer string, make sure // we aren't keeping a reference to the longer string. entityString = new String( entityString ); utf8String = CharacterEntities.unescape( entityString ); if ( cache && entityString.length() < 100 ) { StringUtilities.entityDecodeCache.put( entityString, utf8String ); } } return utf8String; } /** * Returns the URL-encoded version of the provided URL string. */ public static final String getURLEncode( final String url ) { if ( url == null ) { return url; } String encodedURL = StringUtilities.urlEncodeCache.get( url ); if ( encodedURL == null ) { try { encodedURL = URLEncoder.encode( url, "UTF-8" ); } catch ( UnsupportedEncodingException e ) { encodedURL = url; } StringUtilities.urlEncodeCache.put( url, encodedURL ); } return encodedURL; } /** * Returns the URL-decoded version of the provided URL string. */ public static final String getURLDecode( final String url ) { if ( url == null ) { return url; } String encodedURL = StringUtilities.urlDecodeCache.get( url ); if ( encodedURL == null ) { try { encodedURL = URLDecoder.decode( url, "UTF-8" ); } catch ( UnsupportedEncodingException e ) { encodedURL = url; } StringUtilities.urlDecodeCache.put( url, encodedURL ); } return encodedURL; } /** * Returns the display name for the provided canonical name. */ public static final String getDisplayName( String name ) { if ( name == null ) { return name; } String displayName = StringUtilities.displayNameCache.get( name ); if ( displayName == null ) { // If this string is a substring of a longer string, make sure // we aren't keeping a reference to the longer string. name = new String( name ); displayName = StringUtilities.getEntityDecode( name ); StringUtilities.displayNameCache.put( name, displayName ); } return displayName; } /** * Returns the canonicalized name for the provided display name. */ public static final String getCanonicalName( String name ) { if ( name == null ) { return null; } String canonicalName = StringUtilities.canonicalNameCache.get( name ); if ( canonicalName == null ) { // If this string is a substring of a longer string, make sure // we aren't keeping a reference to the longer string. name = new String( name ); canonicalName = StringUtilities.getEntityEncode( name ).toLowerCase(); if ( name.length() < 100 ) { StringUtilities.canonicalNameCache.put( name, canonicalName ); } } return canonicalName; } /** * Returns a list of all elements which contain the given substring in their name. * * @param nameMap The map in which to search for the string * @param substring The substring for which to search */ public static final List<String> getMatchingNames( final String[] names, String searchString ) { if ( searchString == null ) { searchString = ""; } searchString = searchString.trim(); boolean isExactMatch = searchString.startsWith( "\"" ); List<String> matchList = new ArrayList<String>(); if ( isExactMatch ) { String fullString = StringUtilities.getCanonicalName( searchString ); if ( Arrays.binarySearch( names, fullString ) >= 0 ) { matchList.add( fullString ); return matchList; } int end = searchString.endsWith( "\"" ) ? searchString.length() - 1 : searchString.length(); searchString = searchString.substring( 1, end ); } searchString = StringUtilities.getCanonicalName( searchString ); if ( searchString.length() == 0 ) { return matchList; } if ( Arrays.binarySearch( names, searchString ) >= 0 ) { matchList.add( searchString ); return matchList; } if ( isExactMatch ) { return matchList; } int nameCount = names.length; int[] hashes = StringUtilities.hashCache.get( names ); if ( hashes == null ) { hashes = new int[ nameCount ]; for ( int i = 0; i < nameCount; ++i ) { hashes[ i ] = StringUtilities.stringHash( names[ i ] ); } StringUtilities.hashCache.put( names, hashes ); } int hash = StringUtilities.stringHash( searchString ); for ( int i = 0; i < nameCount; ++i ) { if ( ( hashes[ i ] & hash ) == hash && StringUtilities.substringMatches( names[ i ], searchString, true ) ) { matchList.add( names[ i ] ); } } if ( !matchList.isEmpty() ) { return matchList; } for ( int i = 0; i < nameCount; ++i ) { if ( ( hashes[ i ] & hash ) == hash && StringUtilities.substringMatches( names[ i ], searchString, false ) ) { matchList.add( names[ i ] ); } } if ( !matchList.isEmpty() ) { return matchList; } // There is an oddball special case here: a search string containing // spaces can successfully fuzzy-match an item name with no spaces, // for example "in the box" will match "chef-in-the-box". However, // the hash check would prevent us from even trying such a match. // Therefore, strip out the bit representing a space in the hash: hash &= ~StringUtilities.stringHash( " " ); for ( int i = 0; i < nameCount; ++i ) { if ( ( hashes[ i ] & hash ) == hash && StringUtilities.fuzzyMatches( names[ i ], searchString ) ) { matchList.add( names[ i ] ); } } return matchList; } private static final int stringHash( final String s ) { int hash = 0; for ( int i = s.length() - 1; i >= 0; --i ) { hash |= 1 << ( s.charAt( i ) & 0x1F ); } return hash; } public static final boolean substringMatches( final String source, final String substring, final boolean checkBoundaries ) { if ( source == null ) { return false; } if ( substring == null || substring.length() == 0 ) { return true; } int index = source.indexOf( substring ); if ( index == -1 ) { return false; } if ( !checkBoundaries || index == 0 ) { return true; } return !Character.isLetterOrDigit( source.charAt( index - 1 ) ); } public static final boolean fuzzyMatches( final String sourceString, final String searchString ) { if ( sourceString == null ) { return false; } if ( searchString == null || searchString.length() == 0 ) { return true; } return StringUtilities.fuzzyMatches( sourceString, searchString, -1, -1 ); } private static final boolean fuzzyMatches( final String sourceString, final String searchString, final int lastSourceIndex, final int lastSearchIndex ) { int maxSearchIndex = searchString.length() - 1; if ( lastSearchIndex == maxSearchIndex ) { return true; } // Skip over any non alphanumeric characters in the search string // since they hold no meaning. char searchChar; int searchIndex = lastSearchIndex; do { if ( ++searchIndex > maxSearchIndex ) { return true; } searchChar = searchString.charAt( searchIndex ); } while ( Character.isWhitespace( searchChar ) ); // If it matched the first character in the source string, the // character right after the last search, or the match is on a // word boundary, continue searching. int sourceIndex = sourceString.indexOf( searchChar, lastSourceIndex + 1 ); while ( sourceIndex != -1 ) { if ( sourceIndex == 0 || sourceIndex == lastSourceIndex + 1 || isWordBoundary( sourceString.charAt( sourceIndex - 1 ) ) ) { if ( StringUtilities.fuzzyMatches( sourceString, searchString, sourceIndex, searchIndex ) ) { return true; } } sourceIndex = sourceString.indexOf( searchChar, sourceIndex + 1 ); } return false; } private static final boolean isWordBoundary( char ch ) { return ch != '#' && !Character.isLetterOrDigit( ch ); } public static final void insertBefore( final StringBuffer buffer, final String searchString, final String insertString ) { int searchIndex = buffer.indexOf( searchString ); if ( searchIndex == -1 ) { return; } buffer.insert( searchIndex, insertString ); } public static final void insertAfter( final StringBuffer buffer, final String searchString, final String insertString ) { int searchIndex = buffer.indexOf( searchString ); if ( searchIndex == -1 ) { return; } buffer.insert( searchIndex + searchString.length(), insertString ); } public static final String singleStringDelete( final String originalString, final String searchString ) { return StringUtilities.singleStringReplace( originalString, searchString, "" ); } public static final String singleStringReplace( final String originalString, final String searchString, final String replaceString ) { if ( originalString == null ) { return null; } // Using a regular expression, while faster, results // in a lot of String allocation overhead. So, use // a static finally-allocated StringBuffers. int lastIndex = originalString.indexOf( searchString ); if ( lastIndex == -1 ) { return originalString; } StringBuilder buffer = new StringBuilder(); buffer.append( originalString.substring( 0, lastIndex ) ); buffer.append( replaceString ); buffer.append( originalString.substring( lastIndex + searchString.length() ) ); return buffer.toString(); } public static final void singleStringDelete( final StringBuffer buffer, final String searchString ) { StringUtilities.singleStringReplace( buffer, searchString, "" ); } public static final void singleStringReplace( final StringBuffer buffer, final String searchString, final String replaceString ) { int index = buffer.indexOf( searchString ); if ( index != -1 ) { buffer.replace( index, index + searchString.length(), replaceString ); } } public static final String globalStringDelete( final String originalString, final String searchString ) { return StringUtilities.globalStringReplace( originalString, searchString, "" ); } public static final String globalStringReplace( final String originalString, final String searchString, final String replaceString ) { if ( originalString == null ) { return null; } if ( searchString.equals( "" ) ) { return originalString; } // Using a regular expression, while faster, results // in a lot of String allocation overhead. So, use // a static finally-allocated StringBuffers. int lastIndex = originalString.indexOf( searchString ); if ( lastIndex == -1 ) { return originalString; } StringBuilder buffer = new StringBuilder( originalString ); while ( lastIndex != -1 ) { buffer.replace( lastIndex, lastIndex + searchString.length(), replaceString ); lastIndex = buffer.indexOf( searchString, lastIndex + replaceString.length() ); } return buffer.toString(); } public static final void globalStringReplace( final StringBuffer buffer, final String tag, final int replaceWith ) { StringUtilities.globalStringReplace( buffer, tag, String.valueOf( replaceWith ) ); } public static final void globalStringDelete( final StringBuffer buffer, final String tag ) { StringUtilities.globalStringReplace( buffer, tag, "" ); } public static final void globalStringReplace( final StringBuffer buffer, final String tag, String replaceWith ) { if ( buffer == null ) { return; } if ( tag.equals( "" ) ) { return; } if ( replaceWith == null ) { replaceWith = ""; } // Using a regular expression, while faster, results // in a lot of String allocation overhead. So, use // a static finally-allocated StringBuffers. int lastIndex = buffer.indexOf( tag ); while ( lastIndex != -1 ) { buffer.replace( lastIndex, lastIndex + tag.length(), replaceWith ); lastIndex = buffer.indexOf( tag, lastIndex + replaceWith.length() ); } } public static final boolean isNumeric( String string ) { if ( string == null || string.length() == 0 ) { return false; } char ch = string.charAt( 0 ); if ( ( ch != '-' ) && ( ch != '+' ) && !Character.isDigit( ch ) ) { return false; } for ( int i = 1; i < string.length(); ++i ) { ch = string.charAt( i ); if ( ( ch != ',' ) && !Character.isDigit( ch ) ) { return false; } } return true; } public static final boolean isFloat( String string ) { if ( string == null || string.length() == 0 ) { return false; } char ch = string.charAt( 0 ); if ( ( ch != '-' ) && ( ch != '+' ) && ( ch != '.' ) && !Character.isDigit( ch ) ) { return false; } boolean hasDecimalSeparator = false; for ( int i = 1; i < string.length(); ++i ) { ch = string.charAt( i ); if ( ch == '.' ) { if ( hasDecimalSeparator ) { return false; } hasDecimalSeparator = true; } if ( ch != '.' ) { if ( ( ch != ',' ) && !Character.isDigit( ch ) ) { return false; } } } return true; } public static final int parseInt( String string ) { return StringUtilities.parseIntInternal1( string, false ); } public static final int parseIntInternal1( String string, boolean throwException ) throws NumberFormatException { if ( string == null ) { return 0; } // Remove commas anywhere in the string string = StringUtilities.globalStringDelete( string, "," ); // Remove whitespace from front and end of string string = string.trim(); // Remove + sign from start of string if ( string.startsWith( "+" ) ) { string = string.substring( 1 ); } if ( string.length() == 0 ) { return 0; } if ( StringUtilities.isNumeric( string ) ) { try { return Integer.parseInt( string ); } catch ( NumberFormatException e ) { RequestLogger.printLine( string + " is out of range, returning 0" ); return 0; } } String fstring = string.substring( 0, string.length() - 1 ); if ( StringUtilities.isFloat( fstring ) ) { char ch = string.charAt( string.length() - 1 ); float base = StringUtilities.parseFloat( fstring ); float multiplier = 1.0f; switch ( ch ) { case 'k': case 'K': multiplier = 1000.0f; break; case 'm': case 'M': multiplier = 1000000.0f; break; } return (int) ( base * multiplier ); } if ( throwException ) { throw new NumberFormatException( string ); } return StringUtilities.parseIntInternal2( string ); } public static final int parseIntInternal2( String string ) throws NumberFormatException { string = NONINTEGER_PATTERN.matcher( string ).replaceAll( "" ); if ( string.length() == 0 ) { return 0; } try { return Integer.parseInt( string ); } catch ( NumberFormatException e ) { RequestLogger.printLine( string + " is out of range, returning 0" ); return 0; } } public static final long parseLong( String string ) { return StringUtilities.parseLongInternal1( string, false ); } public static final long parseLongInternal1( String string, boolean throwException ) throws NumberFormatException { if ( string == null ) { return 0L; } // Remove commas anywhere in the string string = StringUtilities.globalStringDelete( string, "," ); // Remove whitespace from front and end of string string = string.trim(); // Remove + sign from start of string if ( string.startsWith( "+" ) ) { string = string.substring( 1 ); } if ( string.length() == 0 ) { return 0L; } if ( StringUtilities.isNumeric( string ) ) { try { return Long.parseLong( string ); } catch ( NumberFormatException e ) { RequestLogger.printLine( string + " is out of range, returning 0" ); return 0L; } } String fstring = string.substring( 0, string.length() - 1 ); if ( StringUtilities.isFloat( fstring ) ) { char ch = string.charAt( string.length() - 1 ); double base = StringUtilities.parseDouble( fstring ); double multiplier = 1.0f; switch ( ch ) { case 'k': case 'K': multiplier = 1000.0; break; case 'm': case 'M': multiplier = 1000000.0; break; } return (long) ( base * multiplier ); } if ( throwException ) { throw new NumberFormatException( string ); } return StringUtilities.parseLongInternal2( string ); } public static final long parseLongInternal2( String string ) throws NumberFormatException { string = NONINTEGER_PATTERN.matcher( string ).replaceAll( "" ); if ( string.length() == 0 ) { return 0L; } try { return Long.parseLong( string ); } catch ( NumberFormatException e ) { RequestLogger.printLine( string + " is out of range, returning 0" ); return 0L; } } public static final float parseFloat( String string ) { if ( string == null ) { return 0.0f; } if ( string.startsWith( "+" ) ) { string = string.substring( 1 ); } string = StringUtilities.globalStringDelete( string, "," ); string = StringUtilities.globalStringDelete( string, " " ); if ( string.length() == 0 ) { return 0.0f; } if ( !StringUtilities.isFloat( string ) ) { return 0.0f; } return Float.parseFloat( string ); } public static final double parseDouble( String string ) { if ( string == null ) { return 0.0; } if ( string.startsWith( "+" ) ) { string = string.substring( 1 ); } string = StringUtilities.globalStringDelete( string, "," ); string = StringUtilities.globalStringDelete( string, " " ); if ( string.length() == 0 ) { return 0.0; } if ( !StringUtilities.isFloat( string ) ) { return 0.0; } return Double.parseDouble( string ); } public static final String basicTextWrap( String text ) { if ( text.length() < 80 || text.startsWith( "<html>" ) ) { return text; } StringBuilder result = new StringBuilder(); while ( text.length() > 0 ) { if ( text.length() < 80 ) { result.append( text ); break; } int spaceIndex = text.lastIndexOf( " ", 80 ); int breakIndex = text.lastIndexOf( "\n", spaceIndex ); if ( breakIndex != -1 ) { result.append( text.substring( 0, breakIndex ) ); result.append( "\n" ); text = text.substring( breakIndex ).trim(); } else if ( spaceIndex != -1 ) { result.append( text.substring( 0, spaceIndex ).trim() ); result.append( "\n" ); text = text.substring( spaceIndex ).trim(); } else { result.append( text.substring( 0, 80 ).trim() ); result.append( "\n" ); text = text.substring( 80 ).trim(); } } return result.toString(); } public static final void registerPrepositions( final String text ) { Matcher m = StringUtilities.PREPOSITIONS_PATTERN.matcher( text ); if ( !m.find() ) { return; } StringUtilities.prepositionsMap.put( m.replaceAll( "@" ), text ); } public static final String lookupPrepositions( final String text ) { Matcher m = StringUtilities.PREPOSITIONS_PATTERN.matcher( text ); if ( !m.find() ) { return text; } String rv = StringUtilities.prepositionsMap.get( m.replaceAll( "@" ) ); return rv == null ? text : rv; } public static final String leetify( final String text ) { // It makes no sense to leetify character entities, so convert // them to UTF-8 characters. String decoded = StringUtilities.getEntityDecode( text ); StringBuilder b = new StringBuilder(); for ( int i = 0; i < decoded.length(); ++i ) { char c = decoded.charAt( i ); switch ( c ) { case 'O': case 'o': b.append( "0" ); break; case 'I': case 'i': case 'L': case 'l': b.append( "1" ); break; case 'E': case 'e': b.append( "3" ); break; case 'A': case 'a': b.append( "4" ); break; case 'S': case 's': b.append( "5" ); break; case 'T': case 't': b.append( "7" ); break; default: b.append( c ); } } return b.toString(); } }