/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2015 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.NavigableSet; import java.util.TreeSet; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.MockitoAnnotations; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; import org.pentaho.di.core.RowSet; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettlePluginException; import org.pentaho.di.core.exception.KettleTransException; import org.pentaho.di.core.logging.LogChannelInterface; import org.pentaho.di.partition.PartitionSchema; import org.pentaho.di.trans.TransMeta.TransformationType; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaDataCombi; import org.pentaho.di.trans.step.StepPartitioningMeta; import org.pentaho.di.trans.steps.dummytrans.DummyTransMeta; /** * <p>This test verify transformation step initializations and row sets distributions based * on different steps execution. In this tests uses one step as a producer and one step as a consumer. * Examines different situations when step runs in multiple copies or partitioned. One of * the possible issues of incorrect rowsets initialization described in PDI-12140.</p> * So next combinations is examined: * <ol> * <li>1 - 2x - when one step copy is hoped to step running in 2 copies * <li>2x - 2x - when step running in 2 copies hops to step running in 2 copies * <li>2x - 1 - when step running in 2 copies hops to step running in 1 copy * <li>1 - cl1 - when step running in one copy hops to step running partitioned * <li>cl1-cl1 - when step running partitioned hops to step running partitioned (swim lanes case) * <li>cl1-cl2 - when step running partitioned by one partitioner hops to step partitioned by another partitioner * <li>x2-cl1 - when step running in 2 copies hops to partitioned step * */ public class TransPartitioningTest { /** * This is convenient names for testing steps in transformation. * * The trick is if we use numeric names for steps we can use NavigableSet to find next or previous when mocking * appropriate TransMeta methods (comparable strings). */ private final String ONE = "1"; private final String TWO = "2"; private final String S10 = "1.0"; private final String S11 = "1.1"; private final String S20 = "2.0"; private final String S21 = "2.1"; private final String PID1 = "a"; private final String PID2 = "b"; private final String SP10 = "1.a"; private final String SP11 = "1.b"; private final String SP20 = "2.a"; private final String SP21 = "2.b"; @Mock LogChannelInterface log; Trans trans; /** * Step meta is sorted according StepMeta name so using numbers of step names we can easy build step chain mock. */ private final NavigableSet<StepMeta> chain = new TreeSet<StepMeta>(); @Before public void setUp() throws Exception { MockitoAnnotations.initMocks( this ); trans = new Trans() { @Override public void calculateBatchIdAndDateRange() throws KettleTransException { // avoid NPE if called } @Override public void beginProcessing() throws KettleTransException { // avoid NPE if called } }; // prepare TransMeta complete mock: TransMeta meta = Mockito.mock( TransMeta.class ); Mockito.when( meta.getName() ).thenReturn( "junit meta" ); Mockito.when( meta.getTransformationType() ).thenReturn( TransformationType.Normal ); Mockito.when( meta.getSizeRowset() ).thenReturn( 13 ); Mockito.when( meta.getTransHopSteps( Mockito.anyBoolean() ) ).thenAnswer( new Answer<List<StepMeta>>() { @Override public List<StepMeta> answer( InvocationOnMock invocation ) throws Throwable { return ( new ArrayList<StepMeta>( chain ) ); } } ); Mockito.when( meta.findNextSteps( Mockito.any( StepMeta.class ) ) ).then( new Answer<List<StepMeta>>() { @Override public List<StepMeta> answer( InvocationOnMock invocation ) throws Throwable { Object obj = invocation.getArguments()[0]; StepMeta findFor = StepMeta.class.cast( obj ); List<StepMeta> ret = new ArrayList<StepMeta>(); StepMeta nextStep = chain.higher( findFor ); if ( nextStep != null ) { ret.add( nextStep ); } return ret; } } ); Mockito.when( meta.findPreviousSteps( Mockito.any( StepMeta.class ), Mockito.anyBoolean() ) ).thenAnswer( new Answer<List<StepMeta>>() { @Override public List<StepMeta> answer( InvocationOnMock invocation ) throws Throwable { Object obj = invocation.getArguments()[0]; StepMeta findFor = StepMeta.class.cast( obj ); List<StepMeta> ret = new ArrayList<StepMeta>(); StepMeta prevStep = chain.lower( findFor ); if ( prevStep != null ) { ret.add( prevStep ); } return ret; } } ); Mockito.when( meta.findStep( Mockito.anyString() ) ).thenAnswer( new Answer<StepMeta>() { @Override public StepMeta answer( InvocationOnMock invocation ) throws Throwable { Object obj = invocation.getArguments()[0]; String findFor = String.class.cast( obj ); for ( StepMeta item : chain ) { if ( item.getName().equals( findFor ) ) { return item; } } return null; } } ); trans.setLog( log ); trans.setTransMeta( meta ); } /** * This checks transformation initialization when using one to many copies * * @throws KettleException */ @Test public void testOneToManyCopies() throws KettleException { prepareStepMetas_1_x2(); trans.prepareExecution( new String[] {} ); List<RowSet> rowsets = trans.getRowsets(); assertTrue( !rowsets.isEmpty() ); assertEquals( "We have 2 rowsets finally", 2, rowsets.size() ); assertEquals( "We have 3 steps: one producer and 2 copies of consumer", 3, trans.getSteps().size() ); // Ok, examine initialized steps now. StepInterface stepOne = getStepByName( S10 ); assertTrue( "1 step have no input row sets", stepOne.getInputRowSets().isEmpty() ); assertEquals( "1 step have 2 output rowsets", 2, stepOne.getOutputRowSets().size() ); StepInterface stepTwo0 = getStepByName( S20 ); Assert.assertEquals( "2.0 step have 12 input row sets", 1, stepTwo0.getInputRowSets().size() ); Assert.assertTrue( "2.0 step have no output row sets", stepTwo0.getOutputRowSets().isEmpty() ); StepInterface stepTwo1 = getStepByName( S21 ); Assert.assertEquals( "2.1 step have 1 input row sets", 1, stepTwo1.getInputRowSets().size() ); Assert.assertTrue( "2.1 step have no output row sets", stepTwo1.getOutputRowSets().isEmpty() ); } /** * This checks transformation initialization when using many to many copies. * * @throws KettleException */ @Test public void testManyToManyCopies() throws KettleException { prepareStepMetas_x2_x2(); trans.prepareExecution( new String[] {} ); List<RowSet> rowsets = trans.getRowsets(); assertTrue( !rowsets.isEmpty() ); assertEquals( "We have 2 rowsets finally", 2, rowsets.size() ); assertEquals( "We have 4 steps: 2 copies of producer and 2 copies of consumer", 4, trans.getSteps().size() ); // Ok, examine initialized steps now. StepInterface stepOne0 = getStepByName( S10 ); assertTrue( "1 step have no input row sets", stepOne0.getInputRowSets().isEmpty() ); assertEquals( "1 step have 1 output rowsets", 1, stepOne0.getOutputRowSets().size() ); StepInterface stepOne1 = getStepByName( S11 ); assertTrue( "1 step have no input row sets", stepOne1.getInputRowSets().isEmpty() ); assertEquals( "1 step have 1 output rowsets", 1, stepOne1.getOutputRowSets().size() ); StepInterface stepTwo0 = getStepByName( S20 ); Assert.assertEquals( "2.0 step have 1 input row sets", 1, stepTwo0.getInputRowSets().size() ); Assert.assertTrue( "2.0 step have no output row sets", stepTwo0.getOutputRowSets().isEmpty() ); StepInterface stepTwo1 = getStepByName( S21 ); Assert.assertEquals( "2.1 step have 1 input row sets", 1, stepTwo1.getInputRowSets().size() ); Assert.assertTrue( "2.1 step have no output row sets", stepTwo1.getOutputRowSets().isEmpty() ); } /** * This checks transformation initialization when using many copies to one next step * * @throws KettleException */ @Test public void testManyToOneCopies() throws KettleException { prepareStepMetas_x2_1(); trans.prepareExecution( new String[] {} ); List<RowSet> rowsets = trans.getRowsets(); assertTrue( !rowsets.isEmpty() ); assertEquals( "We have 2 rowsets finally", 2, rowsets.size() ); assertEquals( "We have 4 steps: 2 copies of producer and 2 copies of consumer", 3, trans.getSteps().size() ); // Ok, examine initialized steps now. StepInterface stepOne0 = getStepByName( S10 ); assertTrue( "1 step have no input row sets", stepOne0.getInputRowSets().isEmpty() ); assertEquals( "1 step have 1 output rowsets", 1, stepOne0.getOutputRowSets().size() ); StepInterface stepOne1 = getStepByName( S11 ); assertTrue( "1 step have no input row sets", stepOne1.getInputRowSets().isEmpty() ); assertEquals( "1 step have 1 output rowsets", 1, stepOne1.getOutputRowSets().size() ); StepInterface stepTwo0 = getStepByName( S20 ); Assert.assertEquals( "2.0 step have 2 input row sets", 2, stepTwo0.getInputRowSets().size() ); Assert.assertTrue( "2.0 step have no output row sets", stepTwo0.getOutputRowSets().isEmpty() ); } /** * Test one to one partitioning step transformation organization. * * @throws KettleException */ @Test public void testOneToPartitioningSchema() throws KettleException { prepareStepMetas_1_cl1(); trans.prepareExecution( new String[] {} ); List<RowSet> rowsets = trans.getRowsets(); assertTrue( !rowsets.isEmpty() ); assertEquals( "We have 2 rowsets finally", 2, rowsets.size() ); assertEquals( "We have 3 steps: 1 producer and 2 copies of consumer since it is partitioned", 3, trans.getSteps() .size() ); // Ok, examine initialized steps now. StepInterface stepOne0 = getStepByName( S10 ); assertTrue( "1 step have no input row sets", stepOne0.getInputRowSets().isEmpty() ); assertEquals( "1 step have 2 output rowsets", 2, stepOne0.getOutputRowSets().size() ); StepInterface stepTwo0 = getStepByName( SP20 ); assertEquals( "2.0 step have one input row sets", 1, stepTwo0.getInputRowSets().size() ); assertTrue( "2.0 step have no output rowsets", stepTwo0.getOutputRowSets().isEmpty() ); StepInterface stepTwo1 = getStepByName( SP21 ); Assert.assertEquals( "2.1 step have 1 input row sets", 1, stepTwo1.getInputRowSets().size() ); Assert.assertTrue( "2.1 step have no output row sets", stepTwo1.getOutputRowSets().isEmpty() ); } /** * Test 'Swim lines partitioning' * * @throws KettleException */ @Test public void testSwimLanesPartitioning() throws KettleException { prepareStepMetas_cl1_cl1(); trans.prepareExecution( new String[] {} ); List<RowSet> rowsets = trans.getRowsets(); assertTrue( !rowsets.isEmpty() ); assertEquals( "We have 2 rowsets finally", 2, rowsets.size() ); assertEquals( "We have 3 steps: 1 producer and 2 copies of consumer since it is partitioned", 4, trans.getSteps() .size() ); // Ok, examine initialized steps now. StepInterface stepOne0 = getStepByName( SP10 ); assertTrue( "1.0 step have no input row sets", stepOne0.getInputRowSets().isEmpty() ); assertEquals( "1.0 step have 1 output rowsets", 1, stepOne0.getOutputRowSets().size() ); StepInterface stepOne1 = getStepByName( SP11 ); assertTrue( "1.1 step have no input row sets", stepOne1.getInputRowSets().isEmpty() ); assertEquals( "1.1 step have 1 output rowsets", 1, stepOne1.getOutputRowSets().size() ); StepInterface stepTwo0 = getStepByName( SP20 ); assertEquals( "2.0 step have 2 input row sets", 1, stepTwo0.getInputRowSets().size() ); assertTrue( "2.0 step have no output rowsets", stepTwo0.getOutputRowSets().isEmpty() ); StepInterface stepTwo2 = getStepByName( SP21 ); assertTrue( "2.2 step have no output row sets", stepTwo2.getOutputRowSets().isEmpty() ); assertEquals( "2.2 step have 2 output rowsets", 1, stepTwo2.getInputRowSets().size() ); } /** * This is PDI-12140 case. 2 steps with same partitions ID's count but different partitioner. This is not a swim lines * cases and we need repartitioning here. * * @throws KettleException */ @Test public void testDifferentPartitioningFlow() throws KettleException { prepareStepMetas_cl1_cl2(); trans.prepareExecution( new String[] {} ); List<RowSet> rowsets = trans.getRowsets(); assertTrue( !rowsets.isEmpty() ); assertEquals( "We have 4 rowsets finally since repartitioning happens", 4, rowsets.size() ); assertEquals( "We have 4 steps: 2 producer copies and 2 copies of consumer since they both partitioned", 4, trans .getSteps().size() ); // Ok, examine initialized steps now. StepInterface stepOne0 = getStepByName( SP10 ); assertTrue( "1.0 step have no input row sets", stepOne0.getInputRowSets().isEmpty() ); assertEquals( "1.0 step have 2 output rowsets", 2, stepOne0.getOutputRowSets().size() ); StepInterface stepOne1 = getStepByName( SP11 ); assertTrue( "1.1 step have no input row sets", stepOne1.getInputRowSets().isEmpty() ); assertEquals( "1.1 step have 2 output rowsets", 2, stepOne1.getOutputRowSets().size() ); StepInterface stepTwo0 = getStepByName( SP20 ); assertTrue( "2.0 step have no output row sets", stepTwo0.getOutputRowSets().isEmpty() ); assertEquals( "2.0 step have 1 input rowsets", 2, stepTwo0.getInputRowSets().size() ); StepInterface stepTwo2 = getStepByName( SP21 ); assertTrue( "2.1 step have no output row sets", stepTwo2.getOutputRowSets().isEmpty() ); assertEquals( "2.2 step have 2 input rowsets", 2, stepTwo2.getInputRowSets().size() ); } /** * This is a case when step running in many copies meets partitioning one. * * @throws KettleException */ @Test public void testManyCopiesToPartitioningFlow() throws KettleException { prepareStepMetas_x2_cl1(); trans.prepareExecution( new String[] {} ); List<RowSet> rowsets = trans.getRowsets(); assertTrue( !rowsets.isEmpty() ); assertEquals( "We have 4 rowsets finally since repartitioning happens", 4, rowsets.size() ); assertEquals( "We have 4 steps: 2 producer copies and 2 copies of consumer since consumer is partitioned", 4, trans .getSteps().size() ); // Ok, examine initialized steps now. StepInterface stepOne0 = getStepByName( S10 ); assertTrue( "1.0 step have no input row sets", stepOne0.getInputRowSets().isEmpty() ); assertEquals( "1.0 step have 2 output rowsets", 2, stepOne0.getOutputRowSets().size() ); StepInterface stepOne1 = getStepByName( S11 ); assertTrue( "1.1 step have no input row sets", stepOne1.getInputRowSets().isEmpty() ); assertEquals( "1.1 step have 2 output rowsets", 2, stepOne1.getOutputRowSets().size() ); StepInterface stepTwo0 = getStepByName( SP20 ); assertTrue( "2.0 step have no output row sets", stepTwo0.getOutputRowSets().isEmpty() ); assertEquals( "2.0 step have 2 input rowsets", 2, stepTwo0.getInputRowSets().size() ); StepInterface stepTwo2 = getStepByName( SP21 ); assertTrue( "2.1 step have no output row sets", stepTwo2.getOutputRowSets().isEmpty() ); assertEquals( "2.2 step have 2 input rowsets", 2, stepTwo2.getInputRowSets().size() ); } private StepInterface getStepByName( String name ) { List<StepMetaDataCombi> combiList = trans.getSteps(); for ( StepMetaDataCombi item : combiList ) { if ( item.step.toString().equals( name ) ) { return item.step; } } fail( "Test error, can't find step with name: " + name ); // and this will never happens. return null; } /** * one 'regular step' to 'step running in 2 copies' */ private void prepareStepMetas_1_x2() { StepMeta dummy1 = new StepMeta( ONE, null ); StepMeta dummy2 = new StepMeta( TWO, null ); dummy2.setCopies( 2 ); chain.add( dummy1 ); chain.add( dummy2 ); for ( StepMeta item : chain ) { item.setStepMetaInterface( new DummyTransMeta() ); } } /** * one 'step running in 2 copies' to 'step running in 2 copies' */ private void prepareStepMetas_x2_x2() { StepMeta dummy1 = new StepMeta( ONE, null ); StepMeta dummy2 = new StepMeta( TWO, null ); dummy1.setCopies( 2 ); dummy2.setCopies( 2 ); chain.add( dummy1 ); chain.add( dummy2 ); for ( StepMeta item : chain ) { item.setStepMetaInterface( new DummyTransMeta() ); } } /** * many steps copies to one */ private void prepareStepMetas_x2_1() { StepMeta dummy1 = new StepMeta( ONE, null ); StepMeta dummy2 = new StepMeta( TWO, null ); dummy1.setCopies( 2 ); chain.add( dummy1 ); chain.add( dummy2 ); for ( StepMeta item : chain ) { item.setStepMetaInterface( new DummyTransMeta() ); } } /** * This is a case when we have 1 step to 1 clustered step distribution. * * @throws KettlePluginException */ private void prepareStepMetas_1_cl1() throws KettlePluginException { StepMeta dummy1 = new StepMeta( ONE, null ); StepMeta dummy2 = new StepMeta( TWO, null ); PartitionSchema schema = new PartitionSchema( "p1", Arrays.asList( new String[] { PID1, PID2 } ) ); StepPartitioningMeta partMeta = new StepPartitioningMeta( "Mirror to all partitions", schema ); dummy2.setStepPartitioningMeta( partMeta ); chain.add( dummy1 ); chain.add( dummy2 ); for ( StepMeta item : chain ) { item.setStepMetaInterface( new DummyTransMeta() ); } } /** * This case simulates when we do have 2 step partitioned with one same partitioner We want to get a 'swim-lanes' * transformation * * @throws KettlePluginException */ private void prepareStepMetas_cl1_cl1() throws KettlePluginException { StepMeta dummy1 = new StepMeta( ONE, null ); StepMeta dummy2 = new StepMeta( TWO, null ); PartitionSchema schema = new PartitionSchema( "p1", Arrays.asList( new String[] { PID1, PID2 } ) ); // for delayed binding StepPartitioning meta does not achieve // schema name when using in constructor so we have to set it // explicitly. See equals implementation for StepPartitioningMeta. StepPartitioningMeta partMeta = new StepPartitioningMeta( "Mirror to all partitions", schema ); // that is what I am talking about: partMeta.setPartitionSchemaName( schema.getName() ); dummy1.setStepPartitioningMeta( partMeta ); dummy2.setStepPartitioningMeta( partMeta ); chain.add( dummy1 ); chain.add( dummy2 ); for ( StepMeta item : chain ) { item.setStepMetaInterface( new DummyTransMeta() ); } } /** * This is a case when we have 2 steps, but partitioned differently * * @throws KettlePluginException */ private void prepareStepMetas_cl1_cl2() throws KettlePluginException { StepMeta dummy1 = new StepMeta( ONE, null ); StepMeta dummy2 = new StepMeta( TWO, null ); PartitionSchema schema1 = new PartitionSchema( "p1", Arrays.asList( new String[] { PID1, PID2 } ) ); PartitionSchema schema2 = new PartitionSchema( "p2", Arrays.asList( new String[] { PID1, PID2 } ) ); StepPartitioningMeta partMeta1 = new StepPartitioningMeta( "Mirror to all partitions", schema1 ); StepPartitioningMeta partMeta2 = new StepPartitioningMeta( "Mirror to all partitions", schema2 ); partMeta1.setPartitionSchemaName( schema1.getName() ); partMeta2.setPartitionSchemaName( schema2.getName() ); dummy1.setStepPartitioningMeta( partMeta1 ); dummy2.setStepPartitioningMeta( partMeta2 ); chain.add( dummy1 ); chain.add( dummy2 ); for ( StepMeta item : chain ) { item.setStepMetaInterface( new DummyTransMeta() ); } } /** * This is a case when first step running 2 copies and next is partitioned one. * * @throws KettlePluginException */ private void prepareStepMetas_x2_cl1() throws KettlePluginException { StepMeta dummy1 = new StepMeta( ONE, null ); StepMeta dummy2 = new StepMeta( TWO, null ); PartitionSchema schema1 = new PartitionSchema( "p1", Arrays.asList( new String[] { PID1, PID2 } ) ); StepPartitioningMeta partMeta1 = new StepPartitioningMeta( "Mirror to all partitions", schema1 ); dummy2.setStepPartitioningMeta( partMeta1 ); dummy1.setCopies( 2 ); chain.add( dummy1 ); chain.add( dummy2 ); for ( StepMeta item : chain ) { item.setStepMetaInterface( new DummyTransMeta() ); } } }