/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.sort; import static org.junit.Assert.fail; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.List; import java.util.Random; import java.util.UUID; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import org.pentaho.di.TestUtilities; import org.pentaho.di.core.KettleEnvironment; import org.pentaho.di.core.RowMetaAndData; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleValueException; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.row.value.ValueMetaDate; import org.pentaho.di.core.row.value.ValueMetaInteger; import org.pentaho.di.core.row.value.ValueMetaString; import org.pentaho.di.core.row.value.ValueMetaTimestamp; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.TransTestFactory; /** * Test class for the Sort step. * * @author Sven Boden */ public class SortRowsTest { static String sortRowsStepname = "sort rows step"; static Random RN = new Random( new Date().getTime() ); public static int MAX_COUNT = 1000; @BeforeClass public static void beforeClass() throws KettleException { KettleEnvironment.init(); } RowMetaInterface createStringRowMetaInterface() { RowMetaInterface rm = new RowMeta(); ValueMetaInterface[] valuesMeta = { new ValueMetaString( "KEY1" ), new ValueMetaString( "KEY2" ), }; for ( int i = 0; i < valuesMeta.length; i++ ) { rm.addValueMeta( valuesMeta[i] ); } return rm; } // field names static String KEY1 = "KEY1"; static String KEY2 = "KEY2"; static String TMS = "TMS"; static String INT = "INT"; static String STR = "STR"; static String INTG1 = "INTG1"; static String STRG2 = "STRG2"; static String CONST = "CONST"; /** * Generate rows for couple of possible combinations * <ul> * <li>String * <li>String (with dup) * <li>Timestamp * <li>Integer * <li>String (CaseSensitive) * <li>Integer (pre-sorted) 1 grp * <li>String (pre-sorted) 2 grp * </ul> * * @return */ List<RowMetaAndData> createGlobalData() { List<RowMetaAndData> list = new ArrayList<RowMetaAndData>(); RowMetaInterface rm = new RowMeta(); rm.addValueMeta( new ValueMetaString( KEY1 ) ); rm.addValueMeta( new ValueMetaString( KEY2 ) ); rm.addValueMeta( new ValueMetaDate( TMS ) ); rm.addValueMeta( new ValueMetaInteger( INT ) ); rm.addValueMeta( new ValueMetaString( STR ) ); rm.addValueMeta( new ValueMetaInteger( INTG1 ) ); rm.addValueMeta( new ValueMetaString( STRG2 ) ); rm.addValueMeta( new ValueMetaString( CONST ) ); long time = new Date().getTime(); String prevKey = null; int counter = 0; int group = 0; for ( int idx = 0; idx < MAX_COUNT; idx++ ) { String key1 = UUID.randomUUID().toString(); String key2 = null; if ( ( idx % 10 == 0 || idx == 11 ) && idx != 0 ) { key2 = prevKey; } else { key2 = UUID.randomUUID().toString(); prevKey = key2; } int rand = Math.abs( RN.nextInt() % 10000 ); Timestamp tms = new Timestamp( time + rand ); Long igr = new Long( rand ); String caseSen = TestUtilities.generateString( RN, 10 ); char ch = (char) ( 65 + counter ); String gr2 = String.valueOf( ch ); Object[] row = new Object[] { key1, key2, tms, igr, caseSen, new Long( group ), gr2, "stable" }; list.add( new RowMetaAndData( rm, row ) ); if ( counter == 13 ) { counter = 0; group++; } else { counter++; } } return list; } List<RowMetaAndData> createStringData() { // Create List<RowMetaAndData> list = new ArrayList<RowMetaAndData>(); String old_key1 = null; RowMetaInterface rm = createStringRowMetaInterface(); Random rand = new Random(); for ( int idx = 0; idx < MAX_COUNT; idx++ ) { int key1 = Math.abs( rand.nextInt() % 1000000 ); int key2 = Math.abs( rand.nextInt() % 1000000 ); String key1_string = "" + key1 + "." + idx; String key2_string = "" + key2 + "." + idx; if ( ( ( idx % 100 ) == 0 ) && old_key1 != null ) { // have duplicate key1's sometimes key1_string = old_key1; } Object[] r1 = new Object[] { key1_string, key2_string }; list.add( new RowMetaAndData( rm, r1 ) ); old_key1 = key1_string; } return list; } List<RowMetaAndData> createTimestampData() { // Create long time = new Date().getTime(); List<RowMetaAndData> list = new ArrayList<RowMetaAndData>(); RowMetaInterface rm = createStringRowMetaInterface(); List<ValueMetaInterface> valueMetaList = new ArrayList<ValueMetaInterface>(); valueMetaList.add( new ValueMetaTimestamp( "KEY1" ) ); valueMetaList.add( new ValueMetaTimestamp( "KEY2" ) ); rm.setValueMetaList( valueMetaList ); Random rand = new Random(); for ( int idx = 0; idx < MAX_COUNT; idx++ ) { int key1 = Math.abs( rand.nextInt() % 10000 ); int key2 = Math.abs( rand.nextInt() % 10000 ); Object[] r1 = new Object[] { new Timestamp( time + key1 ), new Timestamp( time + key2 ) }; list.add( new RowMetaAndData( rm, r1 ) ); } return list; } /** * Check the list, the list has to be sorted. */ void checkStringRows( List<RowMetaAndData> rows, boolean ascending ) throws Exception { String prev_key1 = null, prev_key2 = null; int idx = 0; for ( RowMetaAndData rm : rows ) { Object[] r1 = rm.getData(); RowMetaInterface rmi = rm.getRowMeta(); String key1 = rmi.getString( r1, "KEY1", "" ); String key2 = rmi.getString( r1, "KEY2", "" ); if ( prev_key1 != null && prev_key2 != null ) { if ( ascending ) { if ( prev_key1.compareTo( key1 ) == 0 ) { if ( prev_key2.compareTo( key2 ) > 0 ) { fail( "error in sort" ); } } else if ( prev_key1.compareTo( key1 ) > 0 ) { fail( "error in sort" ); } } else { if ( prev_key1.compareTo( key1 ) == 0 ) { if ( prev_key2.compareTo( key2 ) < 0 ) { fail( "error in sort" ); } } else if ( prev_key1.compareTo( key1 ) < 0 ) { fail( "error in sort" ); } } } prev_key1 = key1; prev_key2 = key2; idx++; } Assert.assertEquals( "less rows returned than expected", MAX_COUNT, idx ); } /** * Test case for sorting step .. ascending order on "numeric" data. */ @Test public void testSortRows1() throws Exception { // Create a sort rows step // SortRowsMeta srm = new SortRowsMeta(); srm.setSortSize( Integer.toString( MAX_COUNT / 10 ) ); String[] sortFields = { "KEY1", "KEY2" }; boolean[] ascendingFields = { true, true }; boolean[] caseSensitive = { true, true }; boolean[] presortedFields = { false, false }; srm.setFieldName( sortFields ); srm.setAscending( ascendingFields ); srm.setCaseSensitive( caseSensitive ); srm.setPreSortedField( presortedFields ); srm.setPrefix( "SortRowsTest" ); srm.setDirectory( "." ); TransMeta transMeta = TransTestFactory.generateTestTransformation( null, srm, sortRowsStepname ); // add rows List<RowMetaAndData> inputList = createStringData(); List<RowMetaAndData> ret = TransTestFactory.executeTestTransformation( transMeta, TransTestFactory.INJECTOR_STEPNAME, sortRowsStepname, TransTestFactory.DUMMY_STEPNAME, inputList ); checkStringRows( ret, true ); } /** * Test case for sorting step .. descending order on "numeric" data. */ @Test public void testSortRows2() throws Exception { SortRowsMeta srm = new SortRowsMeta(); srm.setSortSize( Integer.toString( MAX_COUNT / 10 ) ); String[] sortFields = { "KEY1", "KEY2" }; boolean[] ascendingFields = { false, false }; boolean[] caseSensitive = { true, true }; boolean[] presortedFields = { false, false }; srm.setFieldName( sortFields ); srm.setAscending( ascendingFields ); srm.setCaseSensitive( caseSensitive ); srm.setPreSortedField( presortedFields ); srm.setPrefix( "SortRowsTest" ); srm.setDirectory( "." ); TransMeta transMeta = TransTestFactory.generateTestTransformation( null, srm, sortRowsStepname ); // add rows List<RowMetaAndData> inputList = createStringData(); List<RowMetaAndData> ret = TransTestFactory.executeTestTransformation( transMeta, TransTestFactory.INJECTOR_STEPNAME, sortRowsStepname, TransTestFactory.DUMMY_STEPNAME, inputList ); checkStringRows( ret, false ); } /** * Test case for sorting step .. ascending order on "timestamp" data. */ @Test public void testSortRows3() throws Exception { SortRowsMeta srm = new SortRowsMeta(); srm.setSortSize( Integer.toString( MAX_COUNT / 10 ) ); String[] sortFields = { "KEY1", "KEY2" }; boolean[] ascendingFields = { true, true }; boolean[] caseSensitive = { true, true }; boolean[] presortedFields = { false, false }; srm.setFieldName( sortFields ); srm.setAscending( ascendingFields ); srm.setCaseSensitive( caseSensitive ); srm.setPreSortedField( presortedFields ); srm.setPrefix( "SortRowsTest" ); srm.setDirectory( "." ); TransMeta transMeta = TransTestFactory.generateTestTransformation( null, srm, sortRowsStepname ); // add rows List<RowMetaAndData> inputList = createTimestampData(); List<RowMetaAndData> ret = TransTestFactory.executeTestTransformation( transMeta, TransTestFactory.INJECTOR_STEPNAME, sortRowsStepname, TransTestFactory.DUMMY_STEPNAME, inputList ); checkStringRows( ret, true ); } /** * Test for empty input step does not turn into infinity loop * * @throws Exception */ @Test( timeout = 4000 ) public void testSortRowsPresortedNullInput() throws Exception { // // Create a sort rows step // SortRowsMeta srm = new SortRowsMeta(); srm.setSortSize( Integer.toString( MAX_COUNT / 100 ) ); String[] sortFields = { "KEY1", "KEY2" }; boolean[] ascendingFields = { true, true }; boolean[] caseSensitive = { true, true }; boolean[] presortedFields = { true, false }; srm.setFieldName( sortFields ); srm.setAscending( ascendingFields ); srm.setCaseSensitive( caseSensitive ); srm.setPreSortedField( presortedFields ); srm.setPrefix( "SortRowsTest" ); srm.setDirectory( "." ); TransMeta transMeta = TransTestFactory.generateTestTransformation( null, srm, sortRowsStepname ); // add rows List<RowMetaAndData> inputList = Collections.emptyList(); List<RowMetaAndData> ret = TransTestFactory.executeTestTransformation( transMeta, TransTestFactory.INJECTOR_STEPNAME, sortRowsStepname, TransTestFactory.DUMMY_STEPNAME, inputList ); Assert.assertTrue( ret.isEmpty() ); } /** * Uses 2 fields as a group, sort descending * * @throws KettleException */ @Test public void test2GrouppingSort() throws KettleException { SortRowsMeta srm = new SortRowsMeta(); srm.setSortSize( Integer.toString( MAX_COUNT / 100 ) ); boolean asc = false; String[] sortFields = { INTG1, CONST, INT }; boolean[] ascendingFields = { true, true, asc }; boolean[] caseSensitive = { false, false, false }; boolean[] presortedFields = { true, true, false }; srm.setFieldName( sortFields ); srm.setAscending( ascendingFields ); srm.setCaseSensitive( caseSensitive ); srm.setPreSortedField( presortedFields ); srm.setPrefix( "SortRowsTest" ); srm.setDirectory( "." ); TransMeta transMeta = TransTestFactory.generateTestTransformation( null, srm, sortRowsStepname ); // add rows List<RowMetaAndData> inputList = this.createGlobalData(); List<RowMetaAndData> ret = TransTestFactory.executeTestTransformation( transMeta, TransTestFactory.INJECTOR_STEPNAME, sortRowsStepname, TransTestFactory.DUMMY_STEPNAME, inputList ); Assert.assertEquals( "All rows is processed", MAX_COUNT, ret.size() ); this.checkGrouppingFieldSort( ret, false ); } /** * Test that rows can be sorted with one grouping field * * @throws KettleException */ @Test public void test1GroupingSort() throws KettleException { SortRowsMeta srm = new SortRowsMeta(); srm.setSortSize( Integer.toString( MAX_COUNT / 100 ) ); String[] sortFields = { INTG1, INT }; boolean[] ascendingFields = { true, true }; boolean[] caseSensitive = { false, false }; boolean[] presortedFields = { true, false }; srm.setFieldName( sortFields ); srm.setAscending( ascendingFields ); srm.setCaseSensitive( caseSensitive ); srm.setPreSortedField( presortedFields ); srm.setPrefix( "SortRowsTest" ); srm.setDirectory( "." ); TransMeta transMeta = TransTestFactory.generateTestTransformation( null, srm, sortRowsStepname ); // add rows List<RowMetaAndData> inputList = this.createGlobalData(); List<RowMetaAndData> ret = TransTestFactory.executeTestTransformation( transMeta, TransTestFactory.INJECTOR_STEPNAME, sortRowsStepname, TransTestFactory.DUMMY_STEPNAME, inputList ); Assert.assertEquals( "All rows is processed", MAX_COUNT, ret.size() ); this.checkGrouppingFieldSort( ret, true ); } /** * Test rows are sorted case sensitive * * @throws KettleException */ @Test public void testSortCaseSensitive() throws KettleException { SortRowsMeta srm = new SortRowsMeta(); srm.setSortSize( Integer.toString( MAX_COUNT / 100 ) ); String[] sortFields = { STR }; boolean caseSen = true; boolean asc = true; boolean[] ascendingFields = { asc }; boolean[] caseSensitive = { caseSen }; boolean[] presortedFields = { false }; srm.setFieldName( sortFields ); srm.setAscending( ascendingFields ); srm.setCaseSensitive( caseSensitive ); srm.setPreSortedField( presortedFields ); srm.setPrefix( "SortRowsTest" ); srm.setDirectory( "." ); TransMeta transMeta = TransTestFactory.generateTestTransformation( null, srm, sortRowsStepname ); // add rows List<RowMetaAndData> inputList = this.createGlobalData(); List<RowMetaAndData> ret = TransTestFactory.executeTestTransformation( transMeta, TransTestFactory.INJECTOR_STEPNAME, sortRowsStepname, TransTestFactory.DUMMY_STEPNAME, inputList ); Assert.assertEquals( "All rows is processed", MAX_COUNT, ret.size() ); this.checkStringSortCorrect( ret, caseSen, asc ); } /** * Check rows are sorted case insensitive descending. * * @throws KettleException */ @Test public void testStringSortedCaseInsensitive() throws KettleException { SortRowsMeta srm = new SortRowsMeta(); srm.setSortSize( Integer.toString( MAX_COUNT / 100 ) ); String[] sortFields = { STR }; boolean caseSen = false; boolean asc = false; boolean[] ascendingFields = { asc }; boolean[] caseSensitive = { caseSen }; boolean[] presortedFields = { false }; srm.setFieldName( sortFields ); srm.setAscending( ascendingFields ); srm.setCaseSensitive( caseSensitive ); srm.setPreSortedField( presortedFields ); srm.setPrefix( "SortRowsTest" ); srm.setDirectory( "." ); TransMeta transMeta = TransTestFactory.generateTestTransformation( null, srm, sortRowsStepname ); // add rows List<RowMetaAndData> inputList = this.createGlobalData(); List<RowMetaAndData> ret = TransTestFactory.executeTestTransformation( transMeta, TransTestFactory.INJECTOR_STEPNAME, sortRowsStepname, TransTestFactory.DUMMY_STEPNAME, inputList ); Assert.assertEquals( "All rows is processed", MAX_COUNT, ret.size() ); this.checkStringSortCorrect( ret, caseSen, asc ); } private void checkStringSortCorrect( List<RowMetaAndData> list, boolean caseSensitive, boolean asc ) throws KettleValueException { List<String> actual = new ArrayList<String>(); List<String> expected = new ArrayList<String>(); String caseSen = caseSensitive ? "case sensitive" : "case unsensitive"; for ( RowMetaAndData row : list ) { String value = row.getString( STR, null ); if ( !caseSensitive ) { expected.add( value.toLowerCase() ); actual.add( value.toLowerCase() ); } else { expected.add( value ); actual.add( value ); } } if ( asc ) { Collections.sort( expected ); } else { // avoid create custorm comparator Collections.sort( expected ); Collections.reverse( expected ); } Assert.assertEquals( "Data is sorted: " + caseSen, expected, actual ); } private void checkGrouppingFieldSort( List<RowMetaAndData> list, boolean asc ) throws KettleValueException { Long prev = null; List<Long> actual = new ArrayList<Long>(); List<Long> expected = new ArrayList<Long>(); for ( RowMetaAndData row : list ) { Long group = row.getInteger( INTG1 ); if ( prev == null ) { // first row prev = group; } if ( !prev.equals( group ) ) { // group has changed // do assertion if ( asc ) { Collections.sort( expected ); Assert.assertEquals( "Values under one group properly sorted asc", expected, actual ); } else { Collections.sort( expected ); Collections.reverse( expected ); Assert.assertEquals( "Values under one group properly sorted desc", expected, actual ); } actual.clear(); expected.clear(); } prev = group; Long value = row.getInteger( INT ); actual.add( value ); expected.add( value ); } } }