/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.cluster;
import org.apache.commons.vfs2.FileObject;
import org.pentaho.di.core.logging.LogChannel;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransExecutionConfiguration;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.cluster.TransSplitter;
public class PartitioningTest extends BaseCluster {
/**
* This test reads a CSV file in parallel on the cluster, one copy per slave.<br>
* It then partitions the data on id in 12 partitions (4 per slave) and keeps the data partitioned until written to
* file.<br>
* As such we expect 12 files on disk.<br>
* File: "partitioning-swimming-lanes-on-cluster.ktr"<br>
*/
public void testPartitioningSwimmingLanesOnCluster() throws Exception {
init();
ClusterGenerator clusterGenerator = new ClusterGenerator();
try {
clusterGenerator.launchSlaveServers();
TransMeta transMeta =
loadAndModifyTestTransformation( clusterGenerator,
"test/org/pentaho/di/cluster/partitioning-swimming-lanes-on-cluster.ktr" );
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered( transMeta, config );
long nrErrors =
Trans.monitorClusteredTransformation( new LogChannel( "cluster unit test <testParallelFileReadOnMaster>" ),
transSplitter, null, 1 );
assertEquals( 0L, nrErrors );
String[] results = new String[] { "8", "9", "9", "9", "9", "8", "8", "8", "8", "8", "8", "8", };
String[] files =
new String[] { "000", "001", "002", "003", "004", "005", "006", "007", "008", "009", "010", "011", };
for ( int i = 0; i < results.length; i++ ) {
String filename = "${java.io.tmpdir}/partitioning-swimming-lanes-on-cluster-" + files[i] + ".txt";
String result = loadFileContent( transMeta, filename );
assertEqualsIgnoreWhitespacesAndCase( results[i], result );
// Remove the output file : we don't want to leave too much clutter around
//
FileObject file = KettleVFS.getFileObject( transMeta.environmentSubstitute( filename ) );
file.delete();
}
} catch ( Exception e ) {
e.printStackTrace();
fail( e.toString() );
} finally {
try {
clusterGenerator.stopSlaveServers();
} catch ( Exception e ) {
e.printStackTrace();
fail( e.toString() );
}
}
}
/**
* This test reads a CSV file in parallel on the cluster, one copy per slave.<br>
* It then partitions the data on id in 12 partitions (4 per slave).<br>
* After that it re-partitions the data in 9 partitions (3 per slave).<br>
* As such we expect 9 result files on disk.<br>
* File: "partitioning-repartitioning-on-cluster.ktr"<br>
*/
public void testPartitioningRepartitioningOnCluster() throws Exception {
init();
ClusterGenerator clusterGenerator = new ClusterGenerator();
try {
clusterGenerator.launchSlaveServers();
TransMeta transMeta =
loadAndModifyTestTransformation( clusterGenerator,
"test/org/pentaho/di/cluster/partitioning-repartitioning-on-cluster.ktr" );
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered( transMeta, config );
long nrErrors =
Trans.monitorClusteredTransformation( new LogChannel( "cluster unit test <testParallelFileReadOnMaster>" ),
transSplitter, null, 1 );
assertEquals( 0L, nrErrors );
String[] results = new String[] { "8", "9", "9", "9", "9", "8", "8", "8", "8", "8", "8", "8", };
String[] files =
new String[] { "000", "001", "002", "003", "004", "005", "006", "007", "008", "009", "010", "011", };
for ( int i = 0; i < results.length; i++ ) {
String filename = "${java.io.tmpdir}/partitioning-repartitioning-on-cluster-" + files[i] + ".txt";
String result = loadFileContent( transMeta, filename );
assertEqualsIgnoreWhitespacesAndCase( results[i], result );
// Remove the output file : we don't want to leave too much clutter around
//
FileObject file = KettleVFS.getFileObject( transMeta.environmentSubstitute( filename ) );
file.delete();
}
} catch ( Exception e ) {
e.printStackTrace();
fail( e.toString() );
} finally {
try {
clusterGenerator.stopSlaveServers();
} catch ( Exception e ) {
e.printStackTrace();
fail( e.toString() );
}
}
}
/**
* Same as testPartitioningRepartitioningOnCluster() but passing the data to a non-partitioned step on the master.
*
* File: "partitioning-repartitioning-on-cluster3.ktr"<br>
*/
public void testPartitioningRepartitioningOnCluster3() throws Exception {
init();
ClusterGenerator clusterGenerator = new ClusterGenerator();
try {
clusterGenerator.launchSlaveServers();
TransMeta transMeta =
loadAndModifyTestTransformation( clusterGenerator,
"test/org/pentaho/di/cluster/partitioning-repartitioning-on-cluster3.ktr" );
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered( transMeta, config );
long nrErrors =
Trans.monitorClusteredTransformation( new LogChannel( "cluster unit test <testParallelFileReadOnMaster>" ),
transSplitter, null, 1 );
assertEquals( 0L, nrErrors );
String goldenData = "0;16\n1;17\n2;17\n3;17\n4;17\n5;16";
String filename = "${java.io.tmpdir}/partitioning-repartitioning-on-cluster3.txt";
String result = loadFileContent( transMeta, filename );
assertEqualsIgnoreWhitespacesAndCase( goldenData, result );
// Remove the output file : we don't want to leave too much clutter around
//
// FileObject file = KettleVFS.getFileObject(transMeta.environmentSubstitute(filename));
// file.delete();
} catch ( Exception e ) {
e.printStackTrace();
fail( e.toString() );
} finally {
try {
clusterGenerator.stopSlaveServers();
} catch ( Exception e ) {
e.printStackTrace();
fail( e.toString() );
}
}
}
/**
* See PDI-12766
*
* @throws Exception
*/
public void testClusteringWithPartitioningOnMaster() throws Exception {
init();
ClusterGenerator clusterGenerator = new ClusterGenerator();
LogChannel log = new LogChannel( "cluster unit test <test-partitioning-on-master-and-clustering>" );
try {
clusterGenerator.launchSlaveServers();
TransMeta transMeta =
loadAndModifyTestTransformation( clusterGenerator,
"test/org/pentaho/di/cluster/test-partitioning-on-master-and-clustering.ktr" );
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered( transMeta, config );
long nrErrors = Trans.monitorClusteredTransformation( log, transSplitter, null );
assertEquals( 0L, nrErrors );
} catch ( Exception e ) {
e.printStackTrace();
fail( e.toString() );
} finally {
try {
clusterGenerator.stopSlaveServers();
} catch ( Exception e ) {
e.printStackTrace();
fail( e.toString() );
}
}
}
}