/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.hadoop.mapreduce;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.mockito.Mockito.mock;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.pentaho.di.core.KettleEnvironment;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.logging.LogLevel;
import org.pentaho.di.core.logging.LoggingRegistry;
import org.pentaho.di.trans.TransConfiguration;
import org.pentaho.di.trans.TransMeta;
/**
* @author Tatsiana_Kasiankova
*
*/
@SuppressWarnings( { "unchecked", "rawtypes" } )
public class GenericTransCombinerTest {
private static final String COMBINER_OUTPUT_STEPNAME = "combiner-output-stepname";
private static final String COMBINER_INPUT_STEPNAME = "combiner-input-stepname";
private static final String COMBINER_TRANS_META_NAME = "Combiner transformation";
private static TransConfiguration combinerTransExecConfig;
/**
* We expect 4 log channels per run. The total should never grow past logChannelsBefore + 4.
*/
final int EXPECTED_CHANNELS_PER_RUN = 4;
/**
* Run the reducer this many times
*/
final int RUNS = 10;
private Reporter reporterMock = mock( Reporter.class );
private GenericTransCombiner genericTransCombiner;
private JobConf mrJobConfig;
private TransMeta transMeta;
private MockOutputCollector outputCollectorMock = new MockOutputCollector();
@BeforeClass
public static void before() throws KettleException {
KettleEnvironment.init();
combinerTransExecConfig = MRTestUtil.getTransExecConfig( MRTestUtil.getTransMeta( COMBINER_TRANS_META_NAME ) );
}
@Before
public void setUp() throws KettleException, IOException {
genericTransCombiner = new GenericTransCombiner();
mrJobConfig = new JobConf();
//Turn off all debug messages from PentahoMapRunnable to reduce unit test logs
mrJobConfig.set( "logLevel", LogLevel.ERROR.name() );
}
@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
mrJobConfig.set( MRTestUtil.TRANSFORMATION_COMBINER_XML, combinerTransExecConfig.getXML() );
mrJobConfig.setMapOutputKeyClass( Text.class );
mrJobConfig.setMapOutputValueClass( IntWritable.class );
genericTransCombiner.configure( mrJobConfig );
assertEquals( mrJobConfig.getMapOutputKeyClass(), genericTransCombiner.getOutClassK() );
assertEquals( mrJobConfig.getMapOutputValueClass(), genericTransCombiner.getOutClassV() );
}
@Test
public void testCombinerInputOutputSteps() throws IOException, KettleException {
mrJobConfig.set( MRTestUtil.TRANSFORMATION_COMBINER_XML, combinerTransExecConfig.getXML() );
mrJobConfig.set( MRTestUtil.TRANSFORMATION_COMBINER_INPUT_STEPNAME, COMBINER_INPUT_STEPNAME );
mrJobConfig.set( MRTestUtil.TRANSFORMATION_COMBINER_OUTPUT_STEPNAME, COMBINER_OUTPUT_STEPNAME );
assertNull( genericTransCombiner.getInputStepName() );
assertNull( genericTransCombiner.getOutputStepName() );
genericTransCombiner.configure( mrJobConfig );
assertEquals( COMBINER_INPUT_STEPNAME, genericTransCombiner.getInputStepName() );
assertEquals( COMBINER_OUTPUT_STEPNAME, genericTransCombiner.getOutputStepName() );
}
@Test
public void testCombiner_null_output_value() throws Exception {
transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_NULL_TEST_TRANSFORMATION ).toURI().getPath() );
MRTestUtil.configJobCombinerBaseCase( transMeta, mrJobConfig, genericTransCombiner );
genericTransCombiner.reduce( MRTestUtil.KEY_TO_NULL, Arrays.asList( MRTestUtil.VALUE_TO_NULL ).iterator(), outputCollectorMock, reporterMock );
genericTransCombiner.close();
outputCollectorMock.close();
assertNull( "Exception thrown", genericTransCombiner.getException() );
assertEquals( "Received output when we didn't expect any. <null>s aren't passed through.", 0, outputCollectorMock.getCollection().size() );
}
@Test
public void testCombiner_not_null_outputValues() throws Exception {
transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_NOT_NULL_TEST_TRANSFORMATION ).toURI().getPath() );
MRTestUtil.configJobCombinerBaseCase( transMeta, mrJobConfig, genericTransCombiner );
Text expectedKey = new Text( "test" );
List<IntWritable> expectedValue = Arrays.asList( new IntWritable( 8 ), new IntWritable( 9 ) );
genericTransCombiner.reduce( expectedKey, expectedValue.iterator(), outputCollectorMock, reporterMock );
genericTransCombiner.close();
outputCollectorMock.close();
assertNull( "Exception thrown", genericTransCombiner.getException() );
assertEquals( 1, outputCollectorMock.getCollection().size() );
assertEquals( expectedValue, outputCollectorMock.getCollection().get( expectedKey ) );
}
@Test
public void testLogChannelLeaking() throws Exception {
transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_WORDCOUNT_REDUCER_TEST_TRANSFORMATION ).toURI().getPath() );
MRTestUtil.configJobCombinerBaseCase( transMeta, mrJobConfig, genericTransCombiner );
int logChannels = LoggingRegistry.getInstance().getMap().size();
Text wordToCount = null;
int expectedOutputCollectorMockSize = 0;
assertEquals( "Incorrect output", expectedOutputCollectorMockSize, outputCollectorMock.getCollection().size() );
for ( int i = 0; i < RUNS; i++ ) {
// set up test key and value for reducer as a pair of elements: word1-->[1], word2-->[1,2] ...,
// wordN-->[1,...,N-1,N]
wordToCount = new Text( "word" + ( i + 1 ) );
List<IntWritable> wordCounts = IntStream.rangeClosed( 1, i + 1 ).mapToObj( value -> new IntWritable( value ) ).collect( Collectors.toList() );
IntWritable expectedWordCount = new IntWritable( wordCounts.stream().mapToInt( IntWritable::get ).sum() );
genericTransCombiner.reduce( wordToCount, wordCounts.iterator(), outputCollectorMock, reporterMock );
genericTransCombiner.close();
expectedOutputCollectorMockSize++;
assertNull( "Exception thrown", genericTransCombiner.getException() );
assertEquals( "Incorrect output", expectedOutputCollectorMockSize, outputCollectorMock.getCollection().size() );
assertEquals( expectedWordCount, outputCollectorMock.getCollection().get( wordToCount ).get( 0 ) );
assertEquals( "LogChannels are not being cleaned up. On Run #" + ( i + 1 ) + " we have too many.", logChannels + EXPECTED_CHANNELS_PER_RUN, LoggingRegistry.getInstance().getMap().size() );
}
outputCollectorMock.close();
assertEquals( logChannels + EXPECTED_CHANNELS_PER_RUN, LoggingRegistry.getInstance().getMap().size() );
}
}