/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.core.util;
import java.text.DateFormat;
import java.text.DateFormatSymbols;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.exception.KettleValueException;
import org.pentaho.di.core.row.RowMetaInterface;
/**
* A collection of utilities to manipulate strings.
*
* @author wdeclerc
*/
public class StringUtil {
public static final String UNIX_OPEN = "${";
public static final String UNIX_CLOSE = "}";
public static final String WINDOWS_OPEN = "%%";
public static final String WINDOWS_CLOSE = "%%";
public static final String HEX_OPEN = "$[";
public static final String HEX_CLOSE = "]";
public static final String FIELD_OPEN = "?{";
public static final String FIELD_CLOSE = "}";
public static final String CRLF = "\r\n";
public static final String INDENTCHARS = " ";
public static final String EMPTY_STRING = "";
public static final String[] SYSTEM_PROPERTIES = new String[] {
"java.version", "java.vendor", "java.vendor.url", "java.home", "java.vm.specification.version",
"java.vm.specification.vendor", "java.vm.specification.name", "java.vm.version", "java.vm.vendor",
"java.vm.name", "java.specification.version", "java.specification.vendor", "java.specification.name",
"java.class.version", "java.class.path", "java.library.path", "java.io.tmpdir", "java.compiler",
"java.ext.dirs",
"os.name", "os.arch", "os.version",
"file.separator", "path.separator", "line.separator",
"user.name", "user.home", "user.dir", "user.country", "user.language", "user.timezone",
"org.apache.commons.logging.Log", "org.apache.commons.logging.simplelog.log.org.apache.commons.httpclient",
"org.apache.commons.logging.simplelog.showdatetime", "org.eclipse.swt.browser.XULRunnerInitialized",
"org.eclipse.swt.browser.XULRunnerPath",
"sun.arch.data.model", "sun.boot.class.path", "sun.boot.library.path", "sun.cpu.endian", "sun.cpu.isalist",
"sun.io.unicode.encoding", "sun.java.launcher", "sun.jnu.encoding", "sun.management.compiler",
"sun.os.patch.level", };
/**
* Substitutes variables in <code>aString</code>. Variable names are delimited by open and close strings. The values
* are retrieved from the given map.
*
* @param aString
* the string on which to apply the substitution.
* @param variablesValues
* a map containing the variable values. The keys are the variable names, the values are the variable values.
* @param open
* the open delimiter for variables.
* @param close
* the close delimiter for variables.
* @return the string with the substitution applied.
*/
public static String substitute( String aString, Map<String, String> variablesValues, String open, String close ) {
return substitute( aString, variablesValues, open, close, 0 );
}
/**
* Substitutes variables in <code>aString</code>. Variable names are delimited by open and close strings. The values
* are retrieved from the given map.
*
* @param aString
* the string on which to apply the substitution.
* @param variablesValues
* a map containg the variable values. The keys are the variable names, the values are the variable values.
* @param open
* the open delimiter for variables.
* @param close
* the close delimiter for variables.
* @param recursion
* the number of recursion (internal counter to avoid endless loops)
* @return the string with the substitution applied.
*/
public static String substitute( String aString, Map<String, String> variablesValues, String open, String close,
int recursion ) {
if ( aString == null ) {
return null;
}
StringBuilder buffer = new StringBuilder();
String rest = aString;
// search for opening string
int i = rest.indexOf( open );
while ( i > -1 ) {
int j = rest.indexOf( close, i + open.length() );
// search for closing string
if ( j > -1 ) {
String varName = rest.substring( i + open.length(), j );
Object value = variablesValues.get( varName );
if ( value == null ) {
value = open + varName + close;
} else {
// check for another variable inside this value
int another = ( (String) value ).indexOf( open ); // check
// here
// first for
// speed
if ( another > -1 ) {
// for safety: avoid recursive
if ( recursion > 50 ) {
// endless loops with stack overflow
throw new RuntimeException( "Endless loop detected for substitution of variable: " + (String) value );
}
value = substitute( (String) value, variablesValues, open, close, ++recursion );
}
}
buffer.append( rest.substring( 0, i ) );
buffer.append( value );
rest = rest.substring( j + close.length() );
} else {
// no closing tag found; end the search
buffer.append( rest );
rest = "";
}
// keep searching
i = rest.indexOf( open );
}
buffer.append( rest );
return buffer.toString();
}
/**
* Substitutes hex values in <code>aString</code> and convert them to operating system char equivalents in the return
* string. Format is $[01] or $[6F,FF,00,1F] Example:
* "This is a hex encoded six digits number 123456 in this string: $[31,32,33,34,35,36]"
*
* @param aString
* the string on which to apply the substitution.
* @return the string with the substitution applied.
*/
public static String substituteHex( String aString ) {
if ( aString == null ) {
return null;
}
StringBuilder buffer = new StringBuilder();
String rest = aString;
// search for opening string
int i = rest.indexOf( HEX_OPEN );
while ( i > -1 ) {
int j = rest.indexOf( HEX_CLOSE, i + HEX_OPEN.length() );
// search for closing string
if ( j > -1 ) {
buffer.append( rest.substring( 0, i ) );
String hexString = rest.substring( i + HEX_OPEN.length(), j );
String[] hexStringArray = hexString.split( "," );
int hexInt;
byte[] hexByte = new byte[1];
for ( int pos = 0; pos < hexStringArray.length; pos++ ) {
try {
hexInt = Integer.parseInt( hexStringArray[pos], 16 );
} catch ( NumberFormatException e ) {
hexInt = 0; // in case we get an invalid hex value, ignore: we can not log here
}
hexByte[0] = (byte) hexInt;
buffer.append( new String( hexByte ) );
}
rest = rest.substring( j + HEX_CLOSE.length() );
} else {
// no closing tag found; end the search
buffer.append( rest );
rest = "";
}
// keep searching
i = rest.indexOf( HEX_OPEN );
}
buffer.append( rest );
return buffer.toString();
}
/**
* Substitutes variables in <code>aString</code> with the environment values in the system properties
*
* @param aString
* the string on which to apply the substitution.
* @param systemProperties
* the system properties to use
* @return the string with the substitution applied.
*/
public static final synchronized String environmentSubstitute( String aString,
Map<String, String> systemProperties ) {
Map<String, String> sysMap = new HashMap<String, String>();
synchronized ( sysMap ) {
sysMap.putAll( Collections.synchronizedMap( systemProperties ) );
aString = substituteWindows( aString, sysMap );
aString = substituteUnix( aString, sysMap );
aString = substituteHex( aString );
return aString;
}
}
/**
* Substitutes variables in <code>aString</code>. Variables are of the form "${<variable name>}", following the Unix
* scripting convention. The values are retrieved from the given map.
*
* @param aString
* the string on which to apply the substitution.
* @param variables
* a map containg the variable values. The keys are the variable names, the values are the variable values.
* @return the string with the substitution applied.
*/
public static String substituteUnix( String aString, Map<String, String> variables ) {
return substitute( aString, variables, UNIX_OPEN, UNIX_CLOSE );
}
/**
* Substitutes variables in <code>aString</code>. Variables are of the form "%%<variable name>%%", following the
* Windows convention. The values are retrieved from the given map.
*
* @param aString
* the string on which to apply the substitution.
* @param variables
* a map containg the variable values. The keys are the variable names, the values are the variable values.
* @return the string with the substitution applied.
*/
public static String substituteWindows( String aString, Map<String, String> variables ) {
return substitute( aString, variables, WINDOWS_OPEN, WINDOWS_CLOSE );
}
/**
* Substitutes field values in <code>aString</code>. Field values are of the form "?{<field name>}". The values are
* retrieved from the specified row. Please note that the getString() method is used to convert to a String, for all
* values in the row.
*
* @param aString
* the string on which to apply the substitution.
* @param rowMeta
* The row metadata to use.
* @param rowData
* The row data to use
*
* @return the string with the substitution applied.
* @throws KettleValueException
* In case there is a String conversion error
*/
public static String substituteField( String aString, RowMetaInterface rowMeta, Object[] rowData ) throws KettleValueException {
Map<String, String> variables = new HashMap<String, String>();
for ( int i = 0; i < rowMeta.size(); i++ ) {
variables.put( rowMeta.getValueMeta( i ).getName(), rowMeta.getString( rowData, i ) );
}
return substitute( aString, variables, FIELD_OPEN, FIELD_CLOSE );
}
/**
* Search the string and report back on the variables used
*
* @param aString
* The string to search
* @param open
* the open or "start of variable" characters ${ or %%
* @param close
* the close or "end of variable" characters } or %%
* @param list
* the list of variables to add to
* @param includeSystemVariables
* also check for system variables.
*/
private static void getUsedVariables( String aString, String open, String close, List<String> list,
boolean includeSystemVariables ) {
if ( aString == null ) {
return;
}
int p = 0;
while ( p < aString.length() ) {
// OK, we found something... : start of Unix variable
if ( aString.substring( p ).startsWith( open ) ) {
// See if it's closed...
int from = p + open.length();
int to = aString.indexOf( close, from + 1 );
if ( to >= 0 ) {
String variable = aString.substring( from, to );
if ( Const.indexOfString( variable, list ) < 0 ) {
// Either we include the system variables (all)
// Or the variable is not a system variable
// Or it's a system variable but the value has not been set (and we offer the user the option to set it)
//
if ( includeSystemVariables || !isSystemVariable( variable ) || System.getProperty( variable ) == null ) {
list.add( variable );
}
}
// OK, continue
p = to + close.length();
}
}
p++;
}
}
public static boolean isSystemVariable( String aString ) {
return Const.indexOfString( aString, SYSTEM_PROPERTIES ) >= 0;
}
public static void getUsedVariables( String aString, List<String> list, boolean includeSystemVariables ) {
getUsedVariables( aString, UNIX_OPEN, UNIX_CLOSE, list, includeSystemVariables );
getUsedVariables( aString, WINDOWS_OPEN, WINDOWS_CLOSE, list, includeSystemVariables );
}
public static final String generateRandomString( int length, String prefix, String postfix, boolean uppercase ) {
StringBuilder buffer = new StringBuilder();
if ( !Utils.isEmpty( prefix ) ) {
buffer.append( prefix );
}
for ( int i = 0; i < length; i++ ) {
int c = 'a' + (int) ( Math.random() * 26 );
buffer.append( (char) c );
}
if ( !Utils.isEmpty( postfix ) ) {
buffer.append( postfix );
}
if ( uppercase ) {
return buffer.toString().toUpperCase();
}
return buffer.toString();
}
public static String initCap( String st ) {
if ( st == null || st.trim().length() == 0 ) {
return "";
}
if ( st.substring( 0, 1 ).equals( st.substring( 0, 1 ).toUpperCase() ) ) {
// Already initially capitalized.
return st;
} else {
// Capitalize first character
return st.substring( 0, 1 ).toUpperCase() + st.substring( 1 );
}
}
public static double str2num( String pattern, String decimal, String grouping, String currency, String value ) throws KettleValueException {
// 0 : pattern
// 1 : Decimal separator
// 2 : Grouping separator
// 3 : Currency symbol
NumberFormat nf = NumberFormat.getInstance();
DecimalFormat df = (DecimalFormat) nf;
DecimalFormatSymbols dfs = new DecimalFormatSymbols();
if ( !Utils.isEmpty( pattern ) ) {
df.applyPattern( pattern );
}
if ( !Utils.isEmpty( decimal ) ) {
dfs.setDecimalSeparator( decimal.charAt( 0 ) );
}
if ( !Utils.isEmpty( grouping ) ) {
dfs.setGroupingSeparator( grouping.charAt( 0 ) );
}
if ( !Utils.isEmpty( currency ) ) {
dfs.setCurrencySymbol( currency );
}
try {
df.setDecimalFormatSymbols( dfs );
return df.parse( value ).doubleValue();
} catch ( Exception e ) {
String message = "Couldn't convert string to number " + e.toString();
if ( !isEmpty( pattern ) ) {
message += " pattern=" + pattern;
}
if ( !isEmpty( decimal ) ) {
message += " decimal=" + decimal;
}
if ( !isEmpty( grouping ) ) {
message += " grouping=" + grouping.charAt( 0 );
}
if ( !isEmpty( currency ) ) {
message += " currency=" + currency;
}
throw new KettleValueException( message );
}
}
/**
* Check if the string supplied is empty. A String is empty when it is null or when the length is 0
*
* @param string
* The string to check
* @return true if the string supplied is empty
*/
public static final boolean isEmpty( String string ) {
return string == null || string.length() == 0;
}
/**
* Check if the StringBuilder supplied is empty. A StringBuilder is empty when it is null or when the length is 0
*
* @param string
* The StringBuilder to check
* @return true if the StringBuilder supplied is empty
*/
public static final boolean isEmpty( StringBuilder string ) {
return string == null || string.length() == 0;
}
public static Date str2dat( String arg0, String arg1, String val ) throws KettleValueException {
SimpleDateFormat df = new SimpleDateFormat();
DateFormatSymbols dfs = new DateFormatSymbols();
if ( arg1 != null ) {
dfs.setLocalPatternChars( arg1 );
}
if ( arg0 != null ) {
df.applyPattern( arg0 );
}
try {
return df.parse( val );
} catch ( Exception e ) {
throw new KettleValueException( "TO_DATE Couldn't convert String to Date " + e.toString() );
}
}
public static String getIndent( int indentLevel ) {
return INDENTCHARS.substring( 0, indentLevel );
}
/**
* Giving back a date/time string in the format following the rule from the most to the least significant
*
* @param date
* the date to convert
* @return a string in the form yyyddMM_hhmmss
*/
public static String getFormattedDateTime( Date date ) {
return getFormattedDateTime( date, false );
}
/**
* Giving back a date/time string in the format following the rule from the most to the least significant
*
* @param date
* the date to convert
* @param milliseconds
* true when milliseconds should be added
* @return a string in the form yyyddMM_hhmmssSSS (milliseconds will be optional)
*/
public static String getFormattedDateTime( Date date, boolean milliseconds ) {
DateFormat dateFormat = null;
if ( milliseconds ) {
dateFormat = new SimpleDateFormat( Const.GENERALIZED_DATE_TIME_FORMAT_MILLIS );
} else {
dateFormat = new SimpleDateFormat( Const.GENERALIZED_DATE_TIME_FORMAT );
}
return dateFormat.format( date );
}
/**
* Giving back the actual time as a date/time string in the format following the rule from the most to the least
* significant
*
* @return a string in the form yyyddMM_hhmmss
*/
public static String getFormattedDateTimeNow() {
return getFormattedDateTime( new Date(), false );
}
/**
* Giving back the actual time as a date/time string in the format following the rule from the most to the least
* significant
*
* @param milliseconds
* true when milliseconds should be added
* @return a string in the form yyyddMM_hhmmssSSS (milliseconds will be optional)
*/
public static String getFormattedDateTimeNow( boolean milliseconds ) {
return getFormattedDateTime( new Date(), milliseconds );
}
public static boolean IsInteger( String str ) {
try {
Integer.parseInt( str );
} catch ( NumberFormatException e ) {
return false;
}
return true;
}
public static boolean IsNumber( String str ) {
try {
Double.valueOf( str ).doubleValue();
} catch ( Exception e ) {
return false;
}
return true;
}
public static boolean IsDate( String str ) {
return IsDate( "yy-mm-dd" );
}
public static boolean IsDate( String str, String mask ) {
// TODO: What about other dates? Maybe something for a CRQ
try {
SimpleDateFormat fdate = new SimpleDateFormat( "yy-mm-dd" );
fdate.parse( str );
} catch ( Exception e ) {
return false;
}
return true;
}
/**
* remove specification from variable
*
* @param variable
* the variable to look for, with the $ or % variable specification.
* @return the variable name
*/
public static final String getVariableName( String variable ) {
variable = variable.trim();
if ( variable.startsWith( UNIX_OPEN ) || variable.startsWith( WINDOWS_OPEN ) || variable.startsWith( HEX_OPEN ) ) {
variable = variable.substring( 2, variable.length() );
}
if ( variable.endsWith( UNIX_CLOSE ) || variable.endsWith( HEX_CLOSE ) ) {
variable = variable.substring( 0, variable.length() - 1 );
}
if ( variable.endsWith( WINDOWS_CLOSE ) ) {
variable = variable.substring( 0, variable.length() - 2 );
}
return variable;
}
/**
* @param variable
* the variable to look for, with the $ or % variable specification.
* @return true if the input is a variable, false otherwise
*/
public static boolean isVariable( String variable ) {
if ( variable == null ) {
return false;
}
variable = variable.trim();
return variable.startsWith( UNIX_OPEN ) && variable.endsWith( UNIX_CLOSE )
|| variable.startsWith( WINDOWS_OPEN ) && variable.endsWith( WINDOWS_CLOSE )
|| variable.startsWith( HEX_OPEN ) && variable.endsWith( HEX_CLOSE );
}
}