/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.stringoperations;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
/**
* Apply certain operations too string.
*
* @author Samatar Hassan
* @since 02 April 2009
*/
public class StringOperations extends BaseStep implements StepInterface {
private static Class<?> PKG = StringOperationsMeta.class; // for i18n purposes, needed by Translator2!!
private StringOperationsMeta meta;
private StringOperationsData data;
public StringOperations( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
private String processString( String string, int trimType, int lowerUpper, int padType, String padChar, int padLen,
int iniCap, int maskHTML, int digits, int removeSpecialCharacters ) {
String rcode = string;
// Trim ?
if ( !Utils.isEmpty( rcode ) ) {
switch ( trimType ) {
case StringOperationsMeta.TRIM_RIGHT:
rcode = Const.rtrim( rcode );
break;
case StringOperationsMeta.TRIM_LEFT:
rcode = Const.ltrim( rcode );
break;
case StringOperationsMeta.TRIM_BOTH:
rcode = Const.trim( rcode );
break;
default:
break;
}
}
// Lower/Upper ?
if ( !Utils.isEmpty( rcode ) ) {
switch ( lowerUpper ) {
case StringOperationsMeta.LOWER_UPPER_LOWER:
rcode = rcode.toLowerCase();
break;
case StringOperationsMeta.LOWER_UPPER_UPPER:
rcode = rcode.toUpperCase();
break;
default:
break;
}
}
// pad String?
if ( !Utils.isEmpty( rcode ) ) {
switch ( padType ) {
case StringOperationsMeta.PADDING_LEFT:
rcode = Const.Lpad( rcode, padChar, padLen );
break;
case StringOperationsMeta.PADDING_RIGHT:
rcode = Const.Rpad( rcode, padChar, padLen );
break;
default:
break;
}
}
// InitCap ?
if ( !Utils.isEmpty( rcode ) ) {
switch ( iniCap ) {
case StringOperationsMeta.INIT_CAP_NO:
break;
case StringOperationsMeta.INIT_CAP_YES:
rcode = Const.initCap( rcode );
break;
default:
break;
}
}
// escape ?
if ( !Utils.isEmpty( rcode ) ) {
switch ( maskHTML ) {
case StringOperationsMeta.MASK_ESCAPE_XML:
rcode = Const.escapeXml( rcode );
break;
case StringOperationsMeta.MASK_CDATA:
rcode = Const.protectXMLCDATA( rcode );
break;
case StringOperationsMeta.MASK_UNESCAPE_XML:
rcode = Const.unEscapeXml( rcode );
break;
case StringOperationsMeta.MASK_ESCAPE_HTML:
rcode = Const.escapeHtml( rcode );
break;
case StringOperationsMeta.MASK_UNESCAPE_HTML:
rcode = Const.unEscapeHtml( rcode );
break;
case StringOperationsMeta.MASK_ESCAPE_SQL:
rcode = Const.escapeSQL( rcode );
break;
default:
break;
}
}
// digits only or remove digits ?
if ( !Utils.isEmpty( rcode ) ) {
switch ( digits ) {
case StringOperationsMeta.DIGITS_NONE:
break;
case StringOperationsMeta.DIGITS_ONLY:
rcode = Const.getDigitsOnly( rcode );
break;
case StringOperationsMeta.DIGITS_REMOVE:
rcode = Const.removeDigits( rcode );
break;
default:
break;
}
}
// remove special characters ?
if ( !Utils.isEmpty( rcode ) ) {
switch ( removeSpecialCharacters ) {
case StringOperationsMeta.REMOVE_SPECIAL_CHARACTERS_NONE:
break;
case StringOperationsMeta.REMOVE_SPECIAL_CHARACTERS_CR:
rcode = Const.removeCR( rcode );
break;
case StringOperationsMeta.REMOVE_SPECIAL_CHARACTERS_LF:
rcode = Const.removeLF( rcode );
break;
case StringOperationsMeta.REMOVE_SPECIAL_CHARACTERS_CRLF:
rcode = Const.removeCRLF( rcode );
break;
case StringOperationsMeta.REMOVE_SPECIAL_CHARACTERS_TAB:
rcode = Const.removeTAB( rcode );
break;
case StringOperationsMeta.REMOVE_SPECIAL_CHARACTERS_ESPACE:
rcode = rcode.replace( " ", "" );
break;
default:
break;
}
}
return rcode;
}
private Object[] processRow( RowMetaInterface rowMeta, Object[] row ) throws KettleException {
Object[] RowData = new Object[data.outputRowMeta.size()];
// Copy the input fields.
System.arraycopy( row, 0, RowData, 0, rowMeta.size() );
int j = 0; // Index into "new fields" area, past the first {data.inputFieldsNr} records
for ( int i = 0; i < data.nrFieldsInStream; i++ ) {
if ( data.inStreamNrs[i] >= 0 ) {
// Get source value
String value = getInputRowMeta().getString( row, data.inStreamNrs[i] );
// Apply String operations and return result value
value =
processString( value, data.trimOperators[i], data.lowerUpperOperators[i], data.padType[i], data.padChar[i],
data.padLen[i], data.initCap[i], data.maskHTML[i], data.digits[i], data.removeSpecialCharacters[i] );
if ( Utils.isEmpty( data.outStreamNrs[i] ) ) {
// Update field
RowData[data.inStreamNrs[i]] = value;
data.outputRowMeta.getValueMeta( data.inStreamNrs[i] )
.setStorageType( ValueMetaInterface.STORAGE_TYPE_NORMAL );
} else {
// create a new Field
RowData[data.inputFieldsNr + j] = value;
j++;
}
}
}
return RowData;
}
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
meta = (StringOperationsMeta) smi;
data = (StringOperationsData) sdi;
Object[] r = getRow(); // Get row from input rowset & set row busy!
if ( r == null ) {
// no more input to be expected...
setOutputDone();
return false;
}
if ( first ) {
first = false;
// What's the format of the output row?
data.outputRowMeta = getInputRowMeta().clone();
data.inputFieldsNr = data.outputRowMeta.size();
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );
data.nrFieldsInStream = meta.getFieldInStream().length;
data.inStreamNrs = new int[data.nrFieldsInStream];
for ( int i = 0; i < meta.getFieldInStream().length; i++ ) {
data.inStreamNrs[i] = getInputRowMeta().indexOfValue( meta.getFieldInStream()[i] );
if ( data.inStreamNrs[i] < 0 ) { // couldn't find field!
throw new KettleStepException( BaseMessages.getString( PKG, "StringOperations.Exception.FieldRequired", meta
.getFieldInStream()[i] ) );
}
// check field type
if ( !getInputRowMeta().getValueMeta( data.inStreamNrs[i] ).isString() ) {
throw new KettleStepException( BaseMessages.getString( PKG, "StringOperations.Exception.FieldTypeNotString",
meta.getFieldInStream()[i] ) );
}
}
data.outStreamNrs = new String[data.nrFieldsInStream];
for ( int i = 0; i < meta.getFieldInStream().length; i++ ) {
data.outStreamNrs[i] = meta.getFieldOutStream()[i];
}
// Keep track of the trim operators locally for a very small
// optimization.
data.trimOperators = new int[data.nrFieldsInStream];
for ( int i = 0; i < meta.getFieldInStream().length; i++ ) {
data.trimOperators[i] = meta.getTrimType()[i];
}
// lower Upper
data.lowerUpperOperators = new int[data.nrFieldsInStream];
for ( int i = 0; i < meta.getFieldInStream().length; i++ ) {
data.lowerUpperOperators[i] = meta.getLowerUpper()[i];
}
// padding type?
data.padType = new int[data.nrFieldsInStream];
for ( int i = 0; i < meta.getFieldInStream().length; i++ ) {
data.padType[i] = meta.getPaddingType()[i];
}
// padding char
data.padChar = new String[data.nrFieldsInStream];
for ( int i = 0; i < meta.getFieldInStream().length; i++ ) {
data.padChar[i] = environmentSubstitute( meta.getPadChar()[i] );
}
// padding len
data.padLen = new int[data.nrFieldsInStream];
for ( int i = 0; i < meta.getFieldInStream().length; i++ ) {
data.padLen[i] = Const.toInt( environmentSubstitute( meta.getPadLen()[i] ), 0 );
}
// InitCap?
data.initCap = new int[data.nrFieldsInStream];
for ( int i = 0; i < meta.getFieldInStream().length; i++ ) {
data.initCap[i] = meta.getInitCap()[i];
}
// MaskXML?
data.maskHTML = new int[data.nrFieldsInStream];
for ( int i = 0; i < meta.getFieldInStream().length; i++ ) {
data.maskHTML[i] = meta.getMaskXML()[i];
}
// digits?
data.digits = new int[data.nrFieldsInStream];
for ( int i = 0; i < meta.getFieldInStream().length; i++ ) {
data.digits[i] = meta.getDigits()[i];
}
// remove special characters?
data.removeSpecialCharacters = new int[data.nrFieldsInStream];
for ( int i = 0; i < meta.getFieldInStream().length; i++ ) {
data.removeSpecialCharacters[i] = meta.getRemoveSpecialCharacters()[i];
}
} // end if first
try {
Object[] output = processRow( getInputRowMeta(), r );
putRow( data.outputRowMeta, output );
if ( checkFeedback( getLinesRead() ) ) {
if ( log.isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "StringOperations.Log.LineNumber" ) + getLinesRead() );
}
}
} catch ( KettleException e ) {
boolean sendToErrorRow = false;
String errorMessage = null;
if ( getStepMeta().isDoingErrorHandling() ) {
sendToErrorRow = true;
errorMessage = e.toString();
} else {
logError( BaseMessages.getString( PKG, "StringOperations.Log.ErrorInStep", e.getMessage() ) );
setErrors( 1 );
stopAll();
setOutputDone(); // signal end to receiver(s)
return false;
}
if ( sendToErrorRow ) {
// Simply add this row to the error row
putError( getInputRowMeta(), r, 1, errorMessage, null, "StringOperations001" );
}
}
return true;
}
public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
boolean rCode = true;
meta = (StringOperationsMeta) smi;
data = (StringOperationsData) sdi;
if ( super.init( smi, sdi ) ) {
return rCode;
}
return false;
}
public void dispose( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (StringOperationsMeta) smi;
data = (StringOperationsData) sdi;
super.dispose( smi, sdi );
}
}