/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import java.util.ArrayList; import java.util.List; import org.junit.BeforeClass; import org.junit.Test; import org.pentaho.di.core.Const; import org.pentaho.di.core.KettleEnvironment; import org.pentaho.di.core.RowMetaAndData; import org.pentaho.di.core.RowSet; import org.pentaho.di.core.logging.LogLevel; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.row.value.ValueMetaInteger; import org.pentaho.di.core.row.value.ValueMetaString; import org.pentaho.di.trans.TransMeta.TransformationType; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMetaDataCombi; public class WordCountSampleTest { @BeforeClass public static void setUp() throws Exception { KettleEnvironment.init(); } @Test public void testWordCountMapper() throws Exception { // // Create a new transformation... // TransMeta transMeta = new TransMeta( "testfiles/wordcount-mapper.ktr" ); transMeta.setTransformationType( TransformationType.SingleThreaded ); long transStart = System.currentTimeMillis(); // Now execute the transformation... Trans trans = new Trans( transMeta ); trans.setLogLevel( LogLevel.MINIMAL ); trans.prepareExecution( null ); StepInterface si = trans.getStepInterface( "Output", 0 ); RowStepCollector rc = new RowStepCollector(); si.addRowListener( rc ); RowProducer rp = trans.addRowProducer( "Injector", 0 ); trans.startThreads(); String metricsStep = "Remove garbage"; // The single threaded transformation type expects us to run the steps // ourselves. // SingleThreadedTransExecutor executor = new SingleThreadedTransExecutor( trans ); // Initialize all steps // executor.init(); int iterations = 1000000; long totalWait = 0; List<RowMetaAndData> inputList = createMapperData(); for ( int i = 0; i < iterations; i++ ) { // add rows for ( RowMetaAndData rm : inputList ) { Object[] copy = rm.getRowMeta().cloneRow( rm.getData() ); rp.putRow( rm.getRowMeta(), copy ); } long start = System.currentTimeMillis(); boolean cont = executor.oneIteration(); if ( !cont ) { fail( "We don't expect any step or the transformation to be done before the end of all iterations." ); } long end = System.currentTimeMillis(); long delay = end - start; totalWait += delay; if ( i > 0 && ( i % 100000 ) == 0 ) { long rowsProcessed = trans.findRunThread( metricsStep ).getLinesRead(); double speed = Const.round( ( rowsProcessed ) / ( (double) ( end - transStart ) / 1000 ), 1 ); int totalRows = 0; for ( StepMetaDataCombi combi : trans.getSteps() ) { for ( RowSet rowSet : combi.step.getInputRowSets() ) { totalRows += rowSet.size(); } for ( RowSet rowSet : combi.step.getOutputRowSets() ) { totalRows += rowSet.size(); } } System.out.println( "#" + i + " : Finished processing one iteration in " + delay + "ms, average is: " + Const.round( ( (double) totalWait / ( i + 1 ) ), 1 ) + ", speed=" + speed + " row/s, total rows buffered: " + totalRows ); } List<RowMetaAndData> resultRows = rc.getRowsWritten(); // Result has one row less because we filter out one. // We also join with 3 identical rows in a data grid, giving 9 rows of which 3 are filtered out // assertEquals( "Error found in iteration " + i + " : not the expected amount of output rows.", 9, resultRows .size() ); rc.clear(); } rp.finished(); // Dispose all steps. // executor.dispose(); long rowsProcessed = trans.findRunThread( metricsStep ).getLinesRead(); long transEnd = System.currentTimeMillis(); long transTime = transEnd - transStart; System.out.println( "Average delay before idle : " + Const.round( ( (double) totalWait / iterations ), 1 ) ); double transTimeSeconds = Const.round( ( (double) transTime / 1000 ), 1 ); System.out.println( "Total transformation runtime for " + iterations + " iterations :" + transTimeSeconds + " seconds" ); double transTimePerIteration = Const.round( ( (double) transTime / iterations ), 2 ); System.out.println( "Runtime per iteration: " + transTimePerIteration + " miliseconds" ); double rowsPerSecond = Const.round( ( rowsProcessed ) / ( (double) transTime / 1000 ), 1 ); System.out.println( "Average speed: " + rowsPerSecond + " rows/second" ); } @Test public void testWordCountReducer() throws Exception { TransMeta transMeta = new TransMeta( "testfiles/wordcount-reducer.ktr" ); Trans trans = new Trans( transMeta ); trans.prepareExecution( null ); RowProducer rp = trans.addRowProducer( "Injector", 0 ); List<RowMetaAndData> inputList = createReducerData(); for ( RowMetaAndData rm : inputList ) { Object[] copy = rm.getRowMeta().cloneRow( rm.getData() ); rp.putRow( rm.getRowMeta(), copy ); } rp.finished(); trans.startThreads(); trans.waitUntilFinished(); assertEquals( "Reducer should execute without errors", 0, trans.getErrors() ); } public RowMetaInterface createMapperRowMetaInterface() { RowMetaInterface rm = new RowMeta(); ValueMetaInterface[] valuesMeta = { new ValueMetaString( "key" ), new ValueMetaString( "value" ), }; for ( int i = 0; i < valuesMeta.length; i++ ) { rm.addValueMeta( valuesMeta[ i ] ); } return rm; } public List<RowMetaAndData> createMapperData() { List<RowMetaAndData> list = new ArrayList<RowMetaAndData>(); RowMetaInterface rm = createMapperRowMetaInterface(); Object[] r1 = new Object[] { "12345", "The quick brown fox jumped over the lazy dog", }; list.add( new RowMetaAndData( rm, r1 ) ); return list; } private RowMetaInterface createReducerRowMetaInterface() { RowMetaInterface rm = new RowMeta(); ValueMetaInterface[] valuesMeta = { new ValueMetaString( "key" ), new ValueMetaInteger( "value" ), }; for ( ValueMetaInterface aValuesMeta : valuesMeta ) { rm.addValueMeta( aValuesMeta ); } return rm; } private List<RowMetaAndData> createReducerData() { List<RowMetaAndData> list = new ArrayList<RowMetaAndData>(); RowMetaInterface rm = createReducerRowMetaInterface(); Object[] r1 = new Object[] { "A", Long.valueOf( 100 ), }; list.add( new RowMetaAndData( rm, r1 ) ); r1 = new Object[] { "A", Long.valueOf( 200 ), }; list.add( new RowMetaAndData( rm, r1 ) ); r1 = new Object[] { "B", Long.valueOf( 300 ), }; list.add( new RowMetaAndData( rm, r1 ) ); r1 = new Object[] { "C", Long.valueOf( 400 ), }; list.add( new RowMetaAndData( rm, r1 ) ); r1 = new Object[] { "C", Long.valueOf( 500 ), }; list.add( new RowMetaAndData( rm, r1 ) ); r1 = new Object[] { "D", Long.valueOf( 600 ), }; list.add( new RowMetaAndData( rm, r1 ) ); return list; } }