/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.cluster;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.UUID;
import org.pentaho.di.core.Result;
import org.pentaho.di.core.RowMetaAndData;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.logging.LogChannel;
import org.pentaho.di.core.logging.LogLevel;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransExecutionConfiguration;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.cluster.TransSplitter;
public class MasterSlaveTest extends BaseCluster {
ClusterGenerator clusterGenerator;
@Override
protected void setUp() throws Exception {
init();
clusterGenerator = new ClusterGenerator();
clusterGenerator.launchSlaveServers();
}
@Override
protected void tearDown() throws Exception {
clusterGenerator.stopSlaveServers();
}
public void testAll() throws Exception {
runAllocatePorts();
final int ITERATIONS = 2;
runSubtransformationClustered();
for ( int i = 0; i < ITERATIONS; i++ ) {
runParallelFileReadOnMaster();
runParallelFileReadOnMasterWithCopies();
runParallelFileReadOnSlaves();
runParallelFileReadOnSlavesWithPartitioning();
runParallelFileReadOnSlavesWithPartitioning2();
runMultipleCopiesOnMultipleSlaves();
runMultipleCopiesOnMultipleSlaves2();
runOneStepClustered();
}
}
public void runAllocatePorts() throws Exception {
ClusterSchema clusterSchema = clusterGenerator.getClusterSchema();
SlaveServer master = clusterSchema.findMaster();
List<SlaveServer> slaves = clusterSchema.getSlaveServersFromMasterOrLocal();
String clusteredRunId = UUID.randomUUID().toString();
SlaveServer slave1 = slaves.get( 0 );
SlaveServer slave2 = slaves.get( 1 );
SlaveServer slave3 = slaves.get( 2 );
int port1 =
master.allocateServerSocket(
clusteredRunId, 40000, "localhost", "trans1", master.getName(), "A", "0", slave1.getName(), "B", "0" );
assertEquals( 40000, port1 );
int port1b =
master.allocateServerSocket(
clusteredRunId, 40000, "localhost", "trans1", master.getName(), "A", "0", slave1.getName(), "B", "0" );
assertEquals( port1, port1b );
int port2 =
master.allocateServerSocket(
clusteredRunId, 40000, "localhost", "trans1", master.getName(), "A", "0", slave2.getName(), "B", "0" );
assertEquals( 40001, port2 );
int port3 =
master.allocateServerSocket(
clusteredRunId, 40000, "localhost", "trans1", master.getName(), "A", "0", slave3.getName(), "B", "0" );
assertEquals( 40002, port3 );
master.deAllocateServerSockets( "trans1", clusteredRunId );
port1 =
master.allocateServerSocket(
clusteredRunId, 40000, "localhost", "trans2", master.getName(), "A", "0", slave1.getName(), "B", "0" );
assertEquals( 40000, port1 );
master.deAllocateServerSockets( "trans2", clusteredRunId );
}
/**
* This test reads a CSV file in parallel on the master in 1 copy.<br>
* It then passes the data over to a dummy step on the slaves.<br>
* We want to make sure that only 1 copy is considered.<br>
*/
public void runParallelFileReadOnMaster() throws Exception {
TransMeta transMeta =
loadTransMetaReplaceSlavesInCluster(
clusterGenerator, "test/org/pentaho/di/cluster/test-parallel-file-read-on-master.ktr" );
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered( transMeta, config );
LogChannel logChannel = createLogChannel( "cluster unit test <testParallelFileReadOnMaster>" );
long nrErrors = Trans.monitorClusteredTransformation( logChannel, transSplitter, null, 1 );
assertEquals( 0L, nrErrors );
String result = loadFileContent( transMeta, "${java.io.tmpdir}/test-parallel-file-read-on-master-result.txt" );
assertEqualsIgnoreWhitespacesAndCase( "100", result );
}
private static LogChannel createLogChannel( String string ) {
LogChannel logChannel = new LogChannel( string );
logChannel.setLogLevel( LogLevel.BASIC );
return logChannel;
}
/**
* This test reads a CSV file in parallel on the master in 3 copies.<br>
* It then passes the data over to a dummy step on the slaves.<br>
*/
public void runParallelFileReadOnMasterWithCopies() throws Exception {
TransMeta transMeta =
loadTransMetaReplaceSlavesInCluster(
clusterGenerator, "test/org/pentaho/di/cluster/test-parallel-file-read-on-master-with-copies.ktr" );
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered( transMeta, config );
LogChannel logChannel = createLogChannel( "cluster unit test <runParallelFileReadOnMasterWithCopies>" );
long nrErrors = Trans.monitorClusteredTransformation( logChannel, transSplitter, null, 1 );
assertEquals( 0L, nrErrors );
String result =
loadFileContent( transMeta, "${java.io.tmpdir}/test-parallel-file-read-on-master-result-with-copies.txt" );
assertEqualsIgnoreWhitespacesAndCase( "100", result );
}
/**
* This test reads a CSV file in parallel on all 3 slaves, each with 1 copy.<br>
* It then passes the data over to a dummy step on the slaves.<br>
*/
public void runParallelFileReadOnSlaves() throws Exception {
TransMeta transMeta =
loadTransMetaReplaceSlavesInCluster(
clusterGenerator, "test/org/pentaho/di/cluster/test-parallel-file-read-on-slaves.ktr" );
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered( transMeta, config );
LogChannel logChannel = createLogChannel( "cluster unit test <runParallelFileReadOnSlaves>" );
long nrErrors = Trans.monitorClusteredTransformation( logChannel, transSplitter, null, 1 );
assertEquals( 0L, nrErrors );
String result = loadFileContent( transMeta, "${java.io.tmpdir}/test-parallel-file-read-on-slaves.txt" );
assertEqualsIgnoreWhitespacesAndCase( "100", result );
}
/**
* This test reads a CSV file in parallel on all 3 slaves, each with 4 partitions.<br>
* It then passes the data over to a dummy step on the slaves.<br>
*/
public void runParallelFileReadOnSlavesWithPartitioning() throws Exception {
TransMeta transMeta =
loadTransMetaReplaceSlavesInCluster(
clusterGenerator,
"test/org/pentaho/di/cluster/test-parallel-file-read-on-slaves-with-partitioning.ktr" );
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered( transMeta, config );
LogChannel logChannel = createLogChannel( "cluster unit test <runParallelFileReadOnSlavesWithPartitioning>" );
long nrErrors = Trans.monitorClusteredTransformation( logChannel, transSplitter, null, 1 );
assertEquals( 0L, nrErrors );
String result =
loadFileContent( transMeta, "${java.io.tmpdir}/test-parallel-file-read-on-slaves-with-partitioning.txt" );
assertEqualsIgnoreWhitespacesAndCase( "100", result );
}
/**
* This test reads a CSV file in parallel on all 3 slaves, each with 4 partitions.<br>
* This is a variation on the test right above, with 2 steps in sequence in clustering & partitioning.<br>
* It then passes the data over to a dummy step on the slaves.<br>
*/
public void runParallelFileReadOnSlavesWithPartitioning2() throws Exception {
TransMeta transMeta =
loadTransMetaReplaceSlavesInCluster(
clusterGenerator,
"test/org/pentaho/di/cluster/test-parallel-file-read-on-slaves-with-partitioning2.ktr" );
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered( transMeta, config );
LogChannel logChannel = createLogChannel( "cluster unit test <runParallelFileReadOnSlavesWithPartitioning2>" );
long nrErrors = Trans.monitorClusteredTransformation( logChannel, transSplitter, null, 1 );
assertEquals( 0L, nrErrors );
String result =
loadFileContent( transMeta, "${java.io.tmpdir}/test-parallel-file-read-on-slaves-with-partitioning2.txt" );
assertEqualsIgnoreWhitespacesAndCase( "100", result );
}
/**
* This test reads a CSV file and sends the data to 3 copies on 3 slave servers.<br>
*/
public void runMultipleCopiesOnMultipleSlaves2() throws Exception {
TransMeta transMeta =
loadTransMetaReplaceSlavesInCluster(
clusterGenerator, "test/org/pentaho/di/cluster/test-hops-between-multiple-copies-steps-on-cluster.ktr" );
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered( transMeta, config );
LogChannel logChannel = createLogChannel( "cluster unit test <runMultipleCopiesOnMultipleSlaves2>" );
long nrErrors = Trans.monitorClusteredTransformation( logChannel, transSplitter, null, 1 );
assertEquals( 0L, nrErrors );
String result = loadFileContent( transMeta, "${java.io.tmpdir}/test-multiple-copies-on-multiple-slaves2.txt" );
assertEqualsIgnoreWhitespacesAndCase( "90000", result );
}
/**
* This test reads a CSV file and sends the data to 3 copies on 3 slave servers.<br>
*/
public void runMultipleCopiesOnMultipleSlaves() throws Exception {
TransMeta transMeta =
loadTransMetaReplaceSlavesInCluster(
clusterGenerator, "test/org/pentaho/di/cluster/test-multiple-copies-on-multiple-slaves.ktr" );
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered( transMeta, config );
LogChannel logChannel = createLogChannel( "cluster unit test <testMultipleCopiesOnMultipleSlaves>" );
long nrErrors = Trans.monitorClusteredTransformation( logChannel, transSplitter, null, 1 );
assertEquals( 0L, nrErrors );
String result = loadFileContent( transMeta, "${java.io.tmpdir}/test-multiple-copies-on-multiple-slaves.txt" );
assertEqualsIgnoreWhitespacesAndCase( "100", result );
}
/**
* This test generates rows on the master, generates random values clustered and brings them back the master.<br>
* See also: PDI-6324 : Generate Rows to a clustered step ceases to work
*/
public void runOneStepClustered() throws Exception {
TransMeta transMeta =
loadTransMetaReplaceSlavesInCluster(
clusterGenerator, "test/org/pentaho/di/cluster/one-step-clustered.ktr" );
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered( transMeta, config );
LogChannel logChannel = createLogChannel( "cluster unit test <runOneStepClustered>" );
long nrErrors = Trans.monitorClusteredTransformation( logChannel, transSplitter, null, 1 );
assertEquals( 0L, nrErrors );
String result = loadFileContent( transMeta, "${java.io.tmpdir}/one-step-clustered.txt" );
assertEqualsIgnoreWhitespacesAndCase( "10000", result );
}
/**
* This test check passing rows to sub-transformation executed on cluster
* See PDI-10704 for details
* @throws Exception
*/
public void runSubtransformationClustered() throws Exception {
TransMeta transMeta =
loadTransMetaReplaceSlavesInCluster(
clusterGenerator, "test/org/pentaho/di/cluster/test-subtrans-clustered.ktr" );
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
Result prevResult = new Result();
prevResult.setRows( getSampleRows() );
config.setPreviousResult( prevResult );
TransSplitter transSplitter = Trans.executeClustered( transMeta, config );
LogChannel logChannel = createLogChannel( "cluster unit test <runSubtransformationClustered>" );
long nrErrors = Trans.monitorClusteredTransformation( logChannel, transSplitter, null, 1 );
assertEquals( 0L, nrErrors );
String result = loadFileContent( transMeta, "${java.io.tmpdir}/test-subtrans-clustered.txt" );
assertEqualsIgnoreWhitespacesAndCase( "10", result );
}
private static List<RowMetaAndData> getSampleRows() {
List<RowMetaAndData> result = new ArrayList<RowMetaAndData>();
for ( int i = 0; i < 10; i++ ) {
RowMetaAndData row = new RowMetaAndData();
row.addValue( "test", ValueMetaInterface.TYPE_INTEGER, 1L );
result.add( row );
}
return result;
}
private static TransMeta loadTransMetaReplaceSlavesInCluster( ClusterGenerator clusterGenerator,
String testFilename ) throws KettleException {
TransMeta transMeta = new TransMeta( testFilename );
// Add the slave servers
//
for ( SlaveServer slaveServer : ClusterGenerator.LOCAL_TEST_SLAVES ) {
transMeta.getSlaveServers().add( slaveServer );
}
// Replace the slave servers in the specified cluster schema...
//
ClusterSchema clusterSchema = transMeta.findClusterSchema( ClusterGenerator.TEST_CLUSTER_NAME );
assertNotNull( "Cluster schema '" + ClusterGenerator.TEST_CLUSTER_NAME + "' couldn't be found", clusterSchema );
clusterSchema.getSlaveServers().clear();
clusterSchema.getSlaveServers().addAll( Arrays.asList( ClusterGenerator.LOCAL_TEST_SLAVES ) );
return transMeta;
}
}