/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.sort;
import java.io.File;
import java.io.Serializable;
import java.text.Collator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.pentaho.di.core.CheckResult;
import org.pentaho.di.core.CheckResultInterface;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.exception.KettleXMLException;
import org.pentaho.di.core.injection.Injection;
import org.pentaho.di.core.injection.InjectionSupported;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.core.xml.XMLHandler;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.repository.ObjectId;
import org.pentaho.di.repository.Repository;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStepMeta;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
import org.pentaho.metastore.api.IMetaStore;
import org.w3c.dom.Node;
/*
* Created on 02-jun-2003
*/
@InjectionSupported( localizationPrefix = "SortRows.Injection.", groups = { "FIELDS" } )
public class SortRowsMeta extends BaseStepMeta implements StepMetaInterface, Serializable {
private static final long serialVersionUID = -9075883720765645655L;
private static Class<?> PKG = SortRowsMeta.class; // for i18n purposes, needed by Translator2!!
/** order by which fields? */
@Injection( name = "NAME", group = "FIELDS" )
private String[] fieldName;
/** false : descending, true=ascending */
@Injection( name = "SORT_ASCENDING", group = "FIELDS" )
private boolean[] ascending;
/** false : case insensitive, true=case sensitive */
@Injection( name = "IGNORE_CASE", group = "FIELDS" )
private boolean[] caseSensitive;
/** false : collator disabeld, true=collator enabled */
@Injection( name = "COLLATOR_ENABLED", group = "FIELDS" )
private boolean[] collatorEnabled;
// collator strength, 0,1,2,3
@Injection( name = "COLLATOR_STRENGTH", group = "FIELDS" )
private int[] collatorStrength;
/** false : not a presorted field, true=presorted field */
@Injection( name = "PRESORTED", group = "FIELDS" )
private boolean[] preSortedField;
private List<String> groupFields;
/** Directory to store the temp files */
@Injection( name = "SORT_DIRECTORY" )
private String directory;
/** Temp files prefix... */
@Injection( name = "SORT_FILE_PREFIX" )
private String prefix;
/** The sort size: number of rows sorted and kept in memory */
@Injection( name = "SORT_SIZE_ROWS" )
private String sortSize;
/** The free memory limit in percentages in case we don't use the sort size */
@Injection( name = "FREE_MEMORY_TRESHOLD" )
private String freeMemoryLimit;
/** only pass unique rows to the output stream(s) */
@Injection( name = "ONLY_PASS_UNIQUE_ROWS" )
private boolean onlyPassingUniqueRows;
/**
* Compress files: if set to true, temporary files are compressed, thus reducing I/O at the cost of slightly higher
* CPU usage
*/
@Injection( name = "COMPRESS_TEMP_FILES" )
private boolean compressFiles;
/** The variable to use to set the compressFiles option boolean */
private String compressFilesVariable;
public SortRowsMeta() {
super(); // allocate BaseStepMeta
}
/**
* @return Returns the ascending.
*/
public boolean[] getAscending() {
return ascending;
}
/**
* @param ascending
* The ascending to set.
*/
public void setAscending( boolean[] ascending ) {
this.ascending = ascending;
}
/**
* @return Returns the directory.
*/
public String getDirectory() {
return directory;
}
/**
* @param directory
* The directory to set.
*/
public void setDirectory( String directory ) {
this.directory = directory;
}
/**
* @return Returns the fieldName.
*/
public String[] getFieldName() {
return fieldName;
}
/**
* @param fieldName
* The fieldName to set.
*/
public void setFieldName( String[] fieldName ) {
this.fieldName = fieldName;
}
/**
* @return Returns the prefix.
*/
public String getPrefix() {
return prefix;
}
/**
* @param prefix
* The prefix to set.
*/
public void setPrefix( String prefix ) {
this.prefix = prefix;
}
@Override
public void loadXML( Node stepnode, List<DatabaseMeta> databases, IMetaStore metaStore ) throws KettleXMLException {
readData( stepnode );
}
public void allocate( int nrfields ) {
fieldName = new String[nrfields]; // order by
ascending = new boolean[nrfields];
caseSensitive = new boolean[nrfields];
collatorEnabled = new boolean[nrfields];
collatorStrength = new int[nrfields];
preSortedField = new boolean[nrfields];
groupFields = null;
}
@Override
public Object clone() {
SortRowsMeta retval = (SortRowsMeta) super.clone();
int nrfields = fieldName.length;
retval.allocate( nrfields );
System.arraycopy( fieldName, 0, retval.fieldName, 0, nrfields );
System.arraycopy( ascending, 0, retval.ascending, 0, nrfields );
System.arraycopy( caseSensitive, 0, retval.caseSensitive, 0, nrfields );
System.arraycopy( collatorEnabled, 0, retval.collatorEnabled, 0, nrfields );
System.arraycopy( collatorStrength, 0, retval.collatorStrength, 0, nrfields );
System.arraycopy( preSortedField, 0, retval.preSortedField, 0, nrfields );
return retval;
}
private void readData( Node stepnode ) throws KettleXMLException {
try {
directory = XMLHandler.getTagValue( stepnode, "directory" );
prefix = XMLHandler.getTagValue( stepnode, "prefix" );
sortSize = XMLHandler.getTagValue( stepnode, "sort_size" );
freeMemoryLimit = XMLHandler.getTagValue( stepnode, "free_memory" );
compressFiles = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "compress" ) );
compressFilesVariable = XMLHandler.getTagValue( stepnode, "compress_variable" );
onlyPassingUniqueRows = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "unique_rows" ) );
Node fields = XMLHandler.getSubNode( stepnode, "fields" );
int nrfields = XMLHandler.countNodes( fields, "field" );
allocate( nrfields );
String defaultStrength = Integer.toString( this.getDefaultCollationStrength() );
for ( int i = 0; i < nrfields; i++ ) {
Node fnode = XMLHandler.getSubNodeByNr( fields, "field", i );
fieldName[i] = XMLHandler.getTagValue( fnode, "name" );
String asc = XMLHandler.getTagValue( fnode, "ascending" );
ascending[i] = "Y".equalsIgnoreCase( asc );
String sens = XMLHandler.getTagValue( fnode, "case_sensitive" );
String coll = Const.NVL( XMLHandler.getTagValue( fnode, "collator_enabled" ), "N" );
caseSensitive[i] = Utils.isEmpty( sens ) || "Y".equalsIgnoreCase( sens );
collatorEnabled[i] = "Y".equalsIgnoreCase( coll );
collatorStrength[i] =
Integer.parseInt( Const.NVL( XMLHandler.getTagValue( fnode, "collator_strength" ), defaultStrength ) );
String presorted = XMLHandler.getTagValue( fnode, "presorted" );
preSortedField[i] = "Y".equalsIgnoreCase( presorted );
}
} catch ( Exception e ) {
throw new KettleXMLException( "Unable to load step info from XML", e );
}
}
@Override
public void setDefault() {
directory = "%%java.io.tmpdir%%";
prefix = "out";
sortSize = "1000000";
freeMemoryLimit = null;
compressFiles = false;
compressFilesVariable = null;
onlyPassingUniqueRows = false;
int nrfields = 0;
allocate( nrfields );
for ( int i = 0; i < nrfields; i++ ) {
fieldName[i] = "field" + i;
caseSensitive[i] = true;
collatorEnabled[i] = false;
collatorStrength[i] = 0;
preSortedField[i] = false;
}
}
@Override
public String getXML() {
StringBuilder retval = new StringBuilder( 256 );
retval.append( " " ).append( XMLHandler.addTagValue( "directory", directory ) );
retval.append( " " ).append( XMLHandler.addTagValue( "prefix", prefix ) );
retval.append( " " ).append( XMLHandler.addTagValue( "sort_size", sortSize ) );
retval.append( " " ).append( XMLHandler.addTagValue( "free_memory", freeMemoryLimit ) );
retval.append( " " ).append( XMLHandler.addTagValue( "compress", compressFiles ) );
retval.append( " " ).append( XMLHandler.addTagValue( "compress_variable", compressFilesVariable ) );
retval.append( " " ).append( XMLHandler.addTagValue( "unique_rows", onlyPassingUniqueRows ) );
retval.append( " <fields>" ).append( Const.CR );
for ( int i = 0; i < fieldName.length; i++ ) {
retval.append( " <field>" ).append( Const.CR );
retval.append( " " ).append( XMLHandler.addTagValue( "name", fieldName[i] ) );
retval.append( " " ).append( XMLHandler.addTagValue( "ascending", ascending[i] ) );
retval.append( " " ).append( XMLHandler.addTagValue( "case_sensitive", caseSensitive[i] ) );
retval.append( " " ).append( XMLHandler.addTagValue( "collator_enabled", collatorEnabled[i] ) );
retval.append( " " ).append( XMLHandler.addTagValue( "collator_strength", collatorStrength[i] ) );
retval.append( " " ).append( XMLHandler.addTagValue( "presorted", preSortedField[i] ) );
retval.append( " </field>" ).append( Const.CR );
}
retval.append( " </fields>" ).append( Const.CR );
return retval.toString();
}
@Override
public void readRep( Repository rep, IMetaStore metaStore, ObjectId id_step, List<DatabaseMeta> databases )
throws KettleException {
try {
directory = rep.getStepAttributeString( id_step, "directory" );
prefix = rep.getStepAttributeString( id_step, "prefix" );
sortSize = rep.getStepAttributeString( id_step, "sort_size" );
freeMemoryLimit = rep.getStepAttributeString( id_step, "free_memory" );
compressFiles = rep.getStepAttributeBoolean( id_step, "compress" );
compressFilesVariable = rep.getStepAttributeString( id_step, "compress_variable" );
onlyPassingUniqueRows = rep.getStepAttributeBoolean( id_step, "unique_rows" );
int nrfields = rep.countNrStepAttributes( id_step, "field_name" );
allocate( nrfields );
String defaultStrength = Integer.toString( this.getDefaultCollationStrength() );
for ( int i = 0; i < nrfields; i++ ) {
fieldName[i] = rep.getStepAttributeString( id_step, i, "field_name" );
ascending[i] = rep.getStepAttributeBoolean( id_step, i, "field_ascending" );
caseSensitive[i] = rep.getStepAttributeBoolean( id_step, i, "field_case_sensitive", true );
collatorEnabled[i] = rep.getStepAttributeBoolean( id_step, i, "field_collator_enabled", false );
collatorStrength[i] =
Integer.parseInt( Const.NVL( rep.getStepAttributeString( id_step, i, "field_collator_strength" ),
defaultStrength ) );
preSortedField[i] = rep.getStepAttributeBoolean( id_step, i, "field_presorted", false );
}
} catch ( Exception e ) {
throw new KettleException( "Unexpected error reading step information from the repository", e );
}
}
// Returns the default collation strength based on the users' default locale.
// Package protected for testing purposes
int getDefaultCollationStrength() {
return getDefaultCollationStrength( Locale.getDefault() );
}
// Returns the collation strength based on the passed in locale.
// Package protected for testing purposes
int getDefaultCollationStrength( Locale aLocale ) {
int defaultStrength = Collator.IDENTICAL;
if ( aLocale != null ) {
Collator curDefCollator = Collator.getInstance( aLocale );
if ( curDefCollator != null ) {
defaultStrength = curDefCollator.getStrength();
}
}
return defaultStrength;
}
@Override
public void saveRep( Repository rep, IMetaStore metaStore, ObjectId id_transformation, ObjectId id_step )
throws KettleException {
try {
rep.saveStepAttribute( id_transformation, id_step, "directory", directory );
rep.saveStepAttribute( id_transformation, id_step, "prefix", prefix );
rep.saveStepAttribute( id_transformation, id_step, "sort_size", sortSize );
rep.saveStepAttribute( id_transformation, id_step, "free_memory", freeMemoryLimit );
rep.saveStepAttribute( id_transformation, id_step, "compress", compressFiles );
rep.saveStepAttribute( id_transformation, id_step, "compress_variable", compressFilesVariable );
rep.saveStepAttribute( id_transformation, id_step, "unique_rows", onlyPassingUniqueRows );
for ( int i = 0; i < fieldName.length; i++ ) {
rep.saveStepAttribute( id_transformation, id_step, i, "field_name", fieldName[i] );
rep.saveStepAttribute( id_transformation, id_step, i, "field_ascending", ascending[i] );
rep.saveStepAttribute( id_transformation, id_step, i, "field_case_sensitive", caseSensitive[i] );
rep.saveStepAttribute( id_transformation, id_step, i, "field_collator_enabled", collatorEnabled[i] );
rep.saveStepAttribute( id_transformation, id_step, i, "field_collator_strength", collatorStrength[i] );
rep.saveStepAttribute( id_transformation, id_step, i, "field_presorted", preSortedField[i] );
}
} catch ( Exception e ) {
throw new KettleException( "Unable to save step information to the repository for id_step=" + id_step, e );
}
}
@Override
public void getFields( RowMetaInterface inputRowMeta, String name, RowMetaInterface[] info, StepMeta nextStep,
VariableSpace space, Repository repository, IMetaStore metaStore ) throws KettleStepException {
// Set the sorted properties: ascending/descending
assignSortingCriteria( inputRowMeta );
}
@SuppressWarnings( "WeakerAccess" )
public void assignSortingCriteria( RowMetaInterface inputRowMeta ) {
for ( int i = 0; i < fieldName.length; i++ ) {
int idx = inputRowMeta.indexOfValue( fieldName[i] );
if ( idx >= 0 ) {
ValueMetaInterface valueMeta = inputRowMeta.getValueMeta( idx );
// On all these valueMetas, check to see if the value actually exists before we try to
// set them.
if ( ascending.length > i ) {
valueMeta.setSortedDescending( !ascending[i] );
}
if ( caseSensitive.length > i ) {
valueMeta.setCaseInsensitive( !caseSensitive[i] );
}
if ( collatorEnabled.length > i ) {
valueMeta.setCollatorDisabled( !collatorEnabled[i] );
}
if ( collatorStrength.length > i ) {
valueMeta.setCollatorStrength( collatorStrength[i] );
}
// Also see if lazy conversion is active on these key fields.
// If so we want to automatically convert them to the normal storage type.
// This will improve performance, see also: PDI-346
//
valueMeta.setStorageType( ValueMetaInterface.STORAGE_TYPE_NORMAL );
valueMeta.setStorageMetadata( null );
}
}
}
@Override
public void check( List<CheckResultInterface> remarks, TransMeta transMeta, StepMeta stepMeta, RowMetaInterface prev,
String[] input, String[] output, RowMetaInterface info, VariableSpace space, Repository repository,
IMetaStore metaStore ) {
CheckResult cr;
if ( prev != null && prev.size() > 0 ) {
cr =
new CheckResult( CheckResultInterface.TYPE_RESULT_OK, BaseMessages.getString( PKG,
"SortRowsMeta.CheckResult.FieldsReceived", "" + prev.size() ), stepMeta );
remarks.add( cr );
String error_message = "";
boolean error_found = false;
// Starting from selected fields in ...
for ( int i = 0; i < fieldName.length; i++ ) {
int idx = prev.indexOfValue( fieldName[i] );
if ( idx < 0 ) {
error_message += "\t\t" + fieldName[i] + Const.CR;
error_found = true;
}
}
if ( error_found ) {
error_message = BaseMessages.getString( PKG, "SortRowsMeta.CheckResult.SortKeysNotFound", error_message );
cr = new CheckResult( CheckResultInterface.TYPE_RESULT_ERROR, error_message, stepMeta );
remarks.add( cr );
} else {
if ( fieldName.length > 0 ) {
cr =
new CheckResult( CheckResultInterface.TYPE_RESULT_OK, BaseMessages.getString( PKG,
"SortRowsMeta.CheckResult.AllSortKeysFound" ), stepMeta );
remarks.add( cr );
} else {
cr =
new CheckResult( CheckResultInterface.TYPE_RESULT_ERROR, BaseMessages.getString( PKG,
"SortRowsMeta.CheckResult.NoSortKeysEntered" ), stepMeta );
remarks.add( cr );
}
}
// Check the sort directory
String realDirectory = transMeta.environmentSubstitute( directory );
File f = new File( realDirectory );
if ( f.exists() ) {
if ( f.isDirectory() ) {
cr =
new CheckResult( CheckResultInterface.TYPE_RESULT_OK, BaseMessages.getString( PKG,
"SortRowsMeta.CheckResult.DirectoryExists", realDirectory ), stepMeta );
remarks.add( cr );
} else {
cr =
new CheckResult( CheckResultInterface.TYPE_RESULT_ERROR, BaseMessages.getString( PKG,
"SortRowsMeta.CheckResult.ExistsButNoDirectory", realDirectory ), stepMeta );
remarks.add( cr );
}
} else {
cr =
new CheckResult( CheckResultInterface.TYPE_RESULT_ERROR, BaseMessages.getString( PKG,
"SortRowsMeta.CheckResult.DirectoryNotExists", realDirectory ), stepMeta );
remarks.add( cr );
}
} else {
cr =
new CheckResult( CheckResultInterface.TYPE_RESULT_ERROR, BaseMessages.getString( PKG,
"SortRowsMeta.CheckResult.NoFields" ), stepMeta );
remarks.add( cr );
}
// See if we have input streams leading to this step!
if ( input.length > 0 ) {
cr =
new CheckResult( CheckResultInterface.TYPE_RESULT_OK, BaseMessages.getString( PKG,
"SortRowsMeta.CheckResult.ExpectedInputOk" ), stepMeta );
remarks.add( cr );
} else {
cr =
new CheckResult( CheckResultInterface.TYPE_RESULT_ERROR, BaseMessages.getString( PKG,
"SortRowsMeta.CheckResult.ExpectedInputError" ), stepMeta );
remarks.add( cr );
}
}
@Override
public StepInterface getStep( StepMeta stepMeta, StepDataInterface stepDataInterface, int cnr, TransMeta transMeta,
Trans trans ) {
return new SortRows( stepMeta, stepDataInterface, cnr, transMeta, trans );
}
@Override
public StepDataInterface getStepData() {
return new SortRowsData();
}
/**
* @return Returns the sortSize.
*/
public String getSortSize() {
return sortSize;
}
/**
* @param sortSize
* The sortSize to set.
*/
public void setSortSize( String sortSize ) {
this.sortSize = sortSize;
}
/**
* @return Returns whether temporary files should be compressed
*/
public boolean getCompressFiles() {
return compressFiles;
}
/**
* @param compressFiles
* Whether to compress temporary files created during sorting
*/
public void setCompressFiles( boolean compressFiles ) {
this.compressFiles = compressFiles;
}
/**
* @return the onlyPassingUniqueRows
*/
public boolean isOnlyPassingUniqueRows() {
return onlyPassingUniqueRows;
}
/**
* @param onlyPassingUniqueRows
* the onlyPassingUniqueRows to set
*/
public void setOnlyPassingUniqueRows( boolean onlyPassingUniqueRows ) {
this.onlyPassingUniqueRows = onlyPassingUniqueRows;
}
/**
* @return the compressFilesVariable
*/
public String getCompressFilesVariable() {
return compressFilesVariable;
}
/**
* @param compressFilesVariable
* the compressFilesVariable to set
*/
public void setCompressFilesVariable( String compressFilesVariable ) {
this.compressFilesVariable = compressFilesVariable;
}
/**
* @return the caseSensitive
*/
public boolean[] getCaseSensitive() {
return caseSensitive;
}
/**
* @param caseSensitive
* the caseSensitive to set
*/
public void setCaseSensitive( boolean[] caseSensitive ) {
this.caseSensitive = caseSensitive;
}
/**
* @return the collatorEnabled
*/
public boolean[] getCollatorEnabled() {
return collatorEnabled;
}
/**
* @param collatorEnabled
* the collatorEnabled to set
*/
public void setCollatorEnabled( boolean[] collatorEnabled ) {
this.collatorEnabled = collatorEnabled;
}
/**
* @return the collatorStrength
*/
public int[] getCollatorStrength() {
return collatorStrength;
}
/**
* @param collatorStrength
* the collatorStrength to set
*/
public void setCollatorStrength( int[] collatorStrength ) {
this.collatorStrength = collatorStrength;
}
/**
* @return the freeMemoryLimit
*/
public String getFreeMemoryLimit() {
return freeMemoryLimit;
}
/**
* @param freeMemoryLimit
* the freeMemoryLimit to set
*/
public void setFreeMemoryLimit( String freeMemoryLimit ) {
this.freeMemoryLimit = freeMemoryLimit;
}
/**
* @return the preSortedField
*/
public boolean[] getPreSortedField() {
return preSortedField;
}
/**
* @param preSortedField
* the preSorteField to set
*/
public void setPreSortedField( boolean[] preSorted ) {
preSortedField = preSorted;
}
public List<String> getGroupFields() {
if ( this.groupFields == null ) {
for ( int i = 0; i < preSortedField.length; i++ ) {
if ( preSortedField[i] == true ) {
if ( groupFields == null ) {
groupFields = new ArrayList<String>();
}
groupFields.add( this.fieldName[i] );
}
}
}
return groupFields;
}
public boolean isGroupSortEnabled() {
return ( this.getGroupFields() != null ) ? true : false;
}
}