/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.replacestring; import java.util.List; import org.pentaho.di.core.CheckResult; import org.pentaho.di.core.CheckResultInterface; import org.pentaho.di.core.Const; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.database.DatabaseMeta; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleStepException; import org.pentaho.di.core.exception.KettleXMLException; import org.pentaho.di.core.injection.Injection; import org.pentaho.di.core.injection.InjectionSupported; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.row.value.ValueMetaString; import org.pentaho.di.core.variables.VariableSpace; import org.pentaho.di.core.xml.XMLHandler; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.repository.ObjectId; import org.pentaho.di.repository.Repository; import org.pentaho.di.shared.SharedObjectInterface; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStepMeta; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; import org.pentaho.metastore.api.IMetaStore; import org.w3c.dom.Node; @InjectionSupported( localizationPrefix = "ReplaceString.Injection.", groups = { "FIELDS" } ) public class ReplaceStringMeta extends BaseStepMeta implements StepMetaInterface { private static Class<?> PKG = ReplaceStringMeta.class; // for i18n purposes, needed by Translator2!! @Injection( name = "FIELD_IN_STREAM", group = "FIELDS" ) private String[] fieldInStream; @Injection( name = "FIELD_OUT_STREAM", group = "FIELDS" ) private String[] fieldOutStream; @Injection( name = "USE_REGEX", group = "FIELDS" ) private int[] useRegEx; @Injection( name = "REPLACE_STRING", group = "FIELDS" ) private String[] replaceString; @Injection( name = "REPLACE_BY", group = "FIELDS" ) private String[] replaceByString; /** Flag : set empty string **/ @Injection( name = "EMPTY_STRING", group = "FIELDS" ) private boolean[] setEmptyString; @Injection( name = "REPLACE_WITH_FIELD", group = "FIELDS" ) private String[] replaceFieldByString; @Injection( name = "REPLACE_WHOLE_WORD", group = "FIELDS" ) private int[] wholeWord; @Injection( name = "CASE_SENSITIVE", group = "FIELDS" ) private int[] caseSensitive; public static final String[] caseSensitiveCode = { "no", "yes" }; public static final String[] caseSensitiveDesc = new String[] { BaseMessages.getString( PKG, "System.Combo.No" ), BaseMessages.getString( PKG, "System.Combo.Yes" ) }; public static final int CASE_SENSITIVE_NO = 0; public static final int CASE_SENSITIVE_YES = 1; public static final String[] wholeWordDesc = new String[] { BaseMessages.getString( PKG, "System.Combo.No" ), BaseMessages.getString( PKG, "System.Combo.Yes" ) }; public static final String[] wholeWordCode = { "no", "yes" }; public static final int WHOLE_WORD_NO = 0; public static final int WHOLE_WORD_YES = 1; public static final String[] useRegExDesc = new String[] { BaseMessages.getString( PKG, "System.Combo.No" ), BaseMessages.getString( PKG, "System.Combo.Yes" ) }; public static final String[] useRegExCode = { "no", "yes" }; public static final int USE_REGEX_NO = 0; public static final int USE_REGEX_YES = 1; public ReplaceStringMeta() { super(); // allocate BaseStepMeta } /** * @return Returns the fieldInStream. */ public String[] getFieldInStream() { return fieldInStream; } /** * @param keyStream * The fieldInStream to set. */ public void setFieldInStream( String[] keyStream ) { this.fieldInStream = keyStream; } public int[] getCaseSensitive() { return caseSensitive; } public int[] getWholeWord() { return wholeWord; } public void setWholeWord( int[] wholeWord ) { this.wholeWord = wholeWord; } public int[] getUseRegEx() { return useRegEx; } public void setUseRegEx( int[] useRegEx ) { this.useRegEx = useRegEx; } /** * @return the setEmptyString */ public boolean[] isSetEmptyString() { return setEmptyString; } /** * @param setEmptyString * the setEmptyString to set */ public void setEmptyString( boolean[] setEmptyString ) { this.setEmptyString = setEmptyString; } /** * @return Returns the fieldOutStream. */ public String[] getFieldOutStream() { return fieldOutStream; } /** * @param keyStream * The fieldOutStream to set. */ public void setFieldOutStream( String[] keyStream ) { this.fieldOutStream = keyStream; } public String[] getReplaceString() { return replaceString; } public void setReplaceString( String[] replaceString ) { this.replaceString = replaceString; } public String[] getReplaceByString() { return replaceByString; } public void setReplaceByString( String[] replaceByString ) { this.replaceByString = replaceByString; } public String[] getFieldReplaceByString() { return replaceFieldByString; } public void setFieldReplaceByString( String[] replaceFieldByString ) { this.replaceFieldByString = replaceFieldByString; } public void setCaseSensitive( int[] caseSensitive ) { this.caseSensitive = caseSensitive; } public void loadXML( Node stepnode, List<DatabaseMeta> databases, IMetaStore metaStore ) throws KettleXMLException { readData( stepnode, databases ); } public void allocate( int nrkeys ) { fieldInStream = new String[nrkeys]; fieldOutStream = new String[nrkeys]; useRegEx = new int[nrkeys]; replaceString = new String[nrkeys]; replaceByString = new String[nrkeys]; setEmptyString = new boolean[nrkeys]; replaceFieldByString = new String[nrkeys]; wholeWord = new int[nrkeys]; caseSensitive = new int[nrkeys]; } public Object clone() { ReplaceStringMeta retval = (ReplaceStringMeta) super.clone(); int nrkeys = fieldInStream.length; retval.allocate( nrkeys ); System.arraycopy( fieldInStream, 0, retval.fieldInStream, 0, nrkeys ); System.arraycopy( fieldOutStream, 0, retval.fieldOutStream, 0, nrkeys ); System.arraycopy( useRegEx, 0, retval.useRegEx, 0, nrkeys ); System.arraycopy( replaceString, 0, retval.replaceString, 0, nrkeys ); System.arraycopy( replaceByString, 0, retval.replaceByString, 0, nrkeys ); System.arraycopy( setEmptyString, 0, retval.setEmptyString, 0, nrkeys ); System.arraycopy( replaceFieldByString, 0, retval.replaceFieldByString, 0, nrkeys ); System.arraycopy( wholeWord, 0, retval.wholeWord, 0, nrkeys ); System.arraycopy( caseSensitive, 0, retval.caseSensitive, 0, nrkeys ); return retval; } private void readData( Node stepnode, List<? extends SharedObjectInterface> databases ) throws KettleXMLException { try { int nrkeys; Node lookup = XMLHandler.getSubNode( stepnode, "fields" ); nrkeys = XMLHandler.countNodes( lookup, "field" ); allocate( nrkeys ); for ( int i = 0; i < nrkeys; i++ ) { Node fnode = XMLHandler.getSubNodeByNr( lookup, "field", i ); fieldInStream[i] = Const.NVL( XMLHandler.getTagValue( fnode, "in_stream_name" ), "" ); fieldOutStream[i] = Const.NVL( XMLHandler.getTagValue( fnode, "out_stream_name" ), "" ); useRegEx[i] = getCaseSensitiveByCode( Const.NVL( XMLHandler.getTagValue( fnode, "use_regex" ), "" ) ); replaceString[i] = Const.NVL( XMLHandler.getTagValue( fnode, "replace_string" ), "" ); replaceByString[i] = Const.NVL( XMLHandler.getTagValue( fnode, "replace_by_string" ), "" ); String emptyString = XMLHandler.getTagValue( fnode, "set_empty_string" ); setEmptyString[i] = !Utils.isEmpty( emptyString ) && "Y".equalsIgnoreCase( emptyString ); replaceFieldByString[i] = Const.NVL( XMLHandler.getTagValue( fnode, "replace_field_by_string" ), "" ); wholeWord[i] = getWholeWordByCode( Const.NVL( XMLHandler.getTagValue( fnode, "whole_word" ), "" ) ); caseSensitive[i] = getCaseSensitiveByCode( Const.NVL( XMLHandler.getTagValue( fnode, "case_sensitive" ), "" ) ); } } catch ( Exception e ) { throw new KettleXMLException( BaseMessages.getString( PKG, "ReplaceStringMeta.Exception.UnableToReadStepInfoFromXML" ), e ); } } public void setDefault() { fieldInStream = null; fieldOutStream = null; int nrkeys = 0; allocate( nrkeys ); } public String getXML() { StringBuilder retval = new StringBuilder( 500 ); retval.append( " <fields>" ).append( Const.CR ); for ( int i = 0; i < fieldInStream.length; i++ ) { retval.append( " <field>" ).append( Const.CR ); retval.append( " " ).append( XMLHandler.addTagValue( "in_stream_name", fieldInStream[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "out_stream_name", fieldOutStream[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "use_regex", getUseRegExCode( useRegEx[i] ) ) ); retval.append( " " ).append( XMLHandler.addTagValue( "replace_string", replaceString[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "replace_by_string", replaceByString[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "set_empty_string", setEmptyString[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "replace_field_by_string", replaceFieldByString[i] ) ); retval .append( " " ).append( XMLHandler.addTagValue( "whole_word", getWholeWordCode( wholeWord[i] ) ) ); retval.append( " " ).append( XMLHandler.addTagValue( "case_sensitive", getCaseSensitiveCode( caseSensitive[i] ) ) ); retval.append( " </field>" ).append( Const.CR ); } retval.append( " </fields>" ).append( Const.CR ); return retval.toString(); } public void readRep( Repository rep, IMetaStore metaStore, ObjectId id_step, List<DatabaseMeta> databases ) throws KettleException { try { int nrkeys = rep.countNrStepAttributes( id_step, "in_stream_name" ); allocate( nrkeys ); for ( int i = 0; i < nrkeys; i++ ) { fieldInStream[i] = Const.NVL( rep.getStepAttributeString( id_step, i, "in_stream_name" ), "" ); fieldOutStream[i] = Const.NVL( rep.getStepAttributeString( id_step, i, "out_stream_name" ), "" ); useRegEx[i] = getCaseSensitiveByCode( Const.NVL( rep.getStepAttributeString( id_step, i, "use_regex" ), "" ) ); replaceString[i] = Const.NVL( rep.getStepAttributeString( id_step, i, "replace_string" ), "" ); replaceByString[i] = Const.NVL( rep.getStepAttributeString( id_step, i, "replace_by_string" ), "" ); setEmptyString[i] = rep.getStepAttributeBoolean( id_step, i, "set_empty_string", false ); replaceFieldByString[i] = Const.NVL( rep.getStepAttributeString( id_step, i, "replace_field_by_string" ), "" ); wholeWord[i] = getWholeWordByCode( Const.NVL( rep.getStepAttributeString( id_step, i, "whole_world" ), "" ) ); caseSensitive[i] = getCaseSensitiveByCode( Const.NVL( rep.getStepAttributeString( id_step, i, "case_sensitive" ), "" ) ); } } catch ( Exception e ) { throw new KettleException( BaseMessages.getString( PKG, "ReplaceStringMeta.Exception.UnexpectedErrorInReadingStepInfo" ), e ); } } public void saveRep( Repository rep, IMetaStore metaStore, ObjectId id_transformation, ObjectId id_step ) throws KettleException { try { for ( int i = 0; i < fieldInStream.length; i++ ) { rep.saveStepAttribute( id_transformation, id_step, i, "in_stream_name", fieldInStream[i] ); rep.saveStepAttribute( id_transformation, id_step, i, "out_stream_name", fieldOutStream[i] ); rep.saveStepAttribute( id_transformation, id_step, i, "use_regex", getUseRegExCode( useRegEx[i] ) ); rep.saveStepAttribute( id_transformation, id_step, i, "replace_string", replaceString[i] ); rep.saveStepAttribute( id_transformation, id_step, i, "replace_by_string", replaceByString[i] ); rep.saveStepAttribute( id_transformation, id_step, i, "set_empty_string", setEmptyString[i] ); rep.saveStepAttribute( id_transformation, id_step, i, "replace_field_by_string", replaceFieldByString[i] ); rep.saveStepAttribute( id_transformation, id_step, i, "whole_world", getWholeWordCode( wholeWord[i] ) ); rep.saveStepAttribute( id_transformation, id_step, i, "case_sensitive", getCaseSensitiveCode( caseSensitive[i] ) ); } } catch ( Exception e ) { throw new KettleException( BaseMessages.getString( PKG, "ReplaceStringMeta.Exception.UnableToSaveStepInfo" ) + id_step, e ); } } public void getFields( RowMetaInterface inputRowMeta, String name, RowMetaInterface[] info, StepMeta nextStep, VariableSpace space, Repository repository, IMetaStore metaStore ) throws KettleStepException { int nrFields = fieldInStream == null ? 0 : fieldInStream.length; for ( int i = 0; i < nrFields; i++ ) { String fieldName = space.environmentSubstitute( fieldOutStream[i] ); ValueMetaInterface valueMeta; if ( !Utils.isEmpty( fieldOutStream[i] ) ) { // We have a new field valueMeta = new ValueMetaString( fieldName ); valueMeta.setOrigin( name ); //set encoding to new field from source field http://jira.pentaho.com/browse/PDI-11839 ValueMetaInterface sourceField = inputRowMeta.searchValueMeta( fieldInStream[i] ); if ( sourceField != null ) { valueMeta.setStringEncoding( sourceField.getStringEncoding() ); } inputRowMeta.addValueMeta( valueMeta ); } else { valueMeta = inputRowMeta.searchValueMeta( fieldInStream[i] ); if ( valueMeta == null ) { continue; } valueMeta.setStorageType( ValueMetaInterface.STORAGE_TYPE_NORMAL ); } } } public void check( List<CheckResultInterface> remarks, TransMeta transMeta, StepMeta stepinfo, RowMetaInterface prev, String[] input, String[] output, RowMetaInterface info, VariableSpace space, Repository repository, IMetaStore metaStore ) { CheckResult cr; String error_message = ""; boolean first = true; boolean error_found = false; if ( prev == null ) { error_message += BaseMessages.getString( PKG, "ReplaceStringMeta.CheckResult.NoInputReceived" ) + Const.CR; cr = new CheckResult( CheckResult.TYPE_RESULT_ERROR, error_message, stepinfo ); remarks.add( cr ); } else { for ( int i = 0; i < fieldInStream.length; i++ ) { String field = fieldInStream[i]; ValueMetaInterface v = prev.searchValueMeta( field ); if ( v == null ) { if ( first ) { first = false; error_message += BaseMessages.getString( PKG, "ReplaceStringMeta.CheckResult.MissingInStreamFields" ) + Const.CR; } error_found = true; error_message += "\t\t" + field + Const.CR; } } if ( error_found ) { cr = new CheckResult( CheckResult.TYPE_RESULT_ERROR, error_message, stepinfo ); } else { cr = new CheckResult( CheckResult.TYPE_RESULT_OK, BaseMessages.getString( PKG, "ReplaceStringMeta.CheckResult.FoundInStreamFields" ), stepinfo ); } remarks.add( cr ); // Check whether all are strings first = true; error_found = false; for ( int i = 0; i < fieldInStream.length; i++ ) { String field = fieldInStream[i]; ValueMetaInterface v = prev.searchValueMeta( field ); if ( v != null ) { if ( v.getType() != ValueMetaInterface.TYPE_STRING ) { if ( first ) { first = false; error_message += BaseMessages.getString( PKG, "ReplaceStringMeta.CheckResult.OperationOnNonStringFields" ) + Const.CR; } error_found = true; error_message += "\t\t" + field + Const.CR; } } } if ( error_found ) { cr = new CheckResult( CheckResult.TYPE_RESULT_ERROR, error_message, stepinfo ); } else { cr = new CheckResult( CheckResult.TYPE_RESULT_OK, BaseMessages.getString( PKG, "ReplaceStringMeta.CheckResult.AllOperationsOnStringFields" ), stepinfo ); } remarks.add( cr ); if ( fieldInStream.length > 0 ) { for ( int idx = 0; idx < fieldInStream.length; idx++ ) { if ( Utils.isEmpty( fieldInStream[idx] ) ) { cr = new CheckResult( CheckResult.TYPE_RESULT_ERROR, BaseMessages.getString( PKG, "ReplaceStringMeta.CheckResult.InStreamFieldMissing", new Integer( idx + 1 ) .toString() ), stepinfo ); remarks.add( cr ); } } } // Check if all input fields are distinct. for ( int idx = 0; idx < fieldInStream.length; idx++ ) { for ( int jdx = 0; jdx < fieldInStream.length; jdx++ ) { if ( fieldInStream[idx].equals( fieldInStream[jdx] ) && idx != jdx && idx < jdx ) { error_message = BaseMessages.getString( PKG, "ReplaceStringMeta.CheckResult.FieldInputError", fieldInStream[idx] ); cr = new CheckResult( CheckResult.TYPE_RESULT_ERROR, error_message, stepinfo ); remarks.add( cr ); } } } } } public StepInterface getStep( StepMeta stepMeta, StepDataInterface stepDataInterface, int cnr, TransMeta transMeta, Trans trans ) { return new ReplaceString( stepMeta, stepDataInterface, cnr, transMeta, trans ); } public StepDataInterface getStepData() { return new ReplaceStringData(); } public boolean supportsErrorHandling() { return true; } private static String getCaseSensitiveCode( int i ) { if ( i < 0 || i >= caseSensitiveCode.length ) { return caseSensitiveCode[0]; } return caseSensitiveCode[i]; } private static String getWholeWordCode( int i ) { if ( i < 0 || i >= wholeWordCode.length ) { return wholeWordCode[0]; } return wholeWordCode[i]; } private static String getUseRegExCode( int i ) { if ( i < 0 || i >= useRegExCode.length ) { return useRegExCode[0]; } return useRegExCode[i]; } public static String getCaseSensitiveDesc( int i ) { if ( i < 0 || i >= caseSensitiveDesc.length ) { return caseSensitiveDesc[0]; } return caseSensitiveDesc[i]; } public static String getWholeWordDesc( int i ) { if ( i < 0 || i >= wholeWordDesc.length ) { return wholeWordDesc[0]; } return wholeWordDesc[i]; } public static String getUseRegExDesc( int i ) { if ( i < 0 || i >= useRegExDesc.length ) { return useRegExDesc[0]; } return useRegExDesc[i]; } private static int getCaseSensitiveByCode( String tt ) { if ( tt == null ) { return 0; } for ( int i = 0; i < caseSensitiveCode.length; i++ ) { if ( caseSensitiveCode[i].equalsIgnoreCase( tt ) ) { return i; } } return 0; } private static int getWholeWordByCode( String tt ) { if ( tt == null ) { return 0; } for ( int i = 0; i < wholeWordCode.length; i++ ) { if ( wholeWordCode[i].equalsIgnoreCase( tt ) ) { return i; } } return 0; } private static int getRegExByCode( String tt ) { if ( tt == null ) { return 0; } for ( int i = 0; i < useRegExCode.length; i++ ) { if ( useRegExCode[i].equalsIgnoreCase( tt ) ) { return i; } } return 0; } public static int getCaseSensitiveByDesc( String tt ) { if ( tt == null ) { return 0; } for ( int i = 0; i < caseSensitiveDesc.length; i++ ) { if ( caseSensitiveDesc[i].equalsIgnoreCase( tt ) ) { return i; } } // If this fails, try to match using the code. return getCaseSensitiveByCode( tt ); } public static int getWholeWordByDesc( String tt ) { if ( tt == null ) { return 0; } for ( int i = 0; i < wholeWordDesc.length; i++ ) { if ( wholeWordDesc[i].equalsIgnoreCase( tt ) ) { return i; } } // If this fails, try to match using the code. return getWholeWordByCode( tt ); } public static int getUseRegExByDesc( String tt ) { if ( tt == null ) { return 0; } for ( int i = 0; i < useRegExDesc.length; i++ ) { if ( useRegExDesc[i].equalsIgnoreCase( tt ) ) { return i; } } // If this fails, try to match using the code. return getRegExByCode( tt ); } }