/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.hadoop.mapreduce;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.mock;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.pentaho.di.core.KettleEnvironment;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.logging.LogLevel;
import org.pentaho.di.core.logging.LoggingRegistry;
import org.pentaho.di.trans.TransConfiguration;
import org.pentaho.di.trans.TransMeta;
/**
* User: Dzmitry Stsiapanau Date: 10/29/14 Time: 12:44 PM
*/
@SuppressWarnings( { "unchecked", "rawtypes" } )
public class GenericTransReduceTest {
private static final String REDUCE_OUTPUT_STEPNAME = "reduce-output-stepname";
private static final String REDUCE_INPUT_STEPNAME = "reduce-input-stepname";
private static final String REDUCER_TRANS_META_NAME = "Reducer transformation";
/**
* We expect 4 log channels per run. The total should never grow past logChannelsBefore + 4.
*/
final int EXPECTED_CHANNELS_PER_RUN = 4;
/**
* Run the reducer this many times
*/
final int RUNS = 10;
private static TransConfiguration reducerTransExecConfig;
private Reporter reporterMock = mock( Reporter.class );
private GenericTransReduce genericTransReduce;
private JobConf mrJobConfig;
private TransMeta transMeta;
private MockOutputCollector outputCollectorMock = new MockOutputCollector();
@BeforeClass
public static void before() throws KettleException {
KettleEnvironment.init();
reducerTransExecConfig = MRTestUtil.getTransExecConfig( MRTestUtil.getTransMeta( REDUCER_TRANS_META_NAME ) );
}
@Before
public void setUp() throws KettleException, IOException {
genericTransReduce = new GenericTransReduce();
mrJobConfig = new JobConf();
// Turn off all debug messages from PentahoMapRunnable to reduce unit test logs
mrJobConfig.set( "logLevel", LogLevel.ERROR.name() );
}
@Test
public void testClose() throws KettleException, IOException {
GenericTransReduce gtr = new GenericTransReduce();
try {
gtr.close();
} catch ( NullPointerException ex ) {
ex.printStackTrace();
fail( " Null pointer on close look PDI-13080 " + ex.getMessage() );
}
}
@Test
public void testReducerOutputClasses() throws IOException, KettleException {
mrJobConfig.set( MRTestUtil.TRANSFORMATION_REDUCE_XML, reducerTransExecConfig.getXML() );
mrJobConfig.setOutputKeyClass( Text.class );
mrJobConfig.setOutputValueClass( LongWritable.class );
genericTransReduce.configure( mrJobConfig );
assertEquals( mrJobConfig.getOutputKeyClass(), genericTransReduce.getOutClassK() );
assertEquals( mrJobConfig.getOutputValueClass(), genericTransReduce.getOutClassV() );
}
@Test
public void testReducerInputOutputSteps() throws IOException, KettleException {
mrJobConfig.set( MRTestUtil.TRANSFORMATION_REDUCE_XML, reducerTransExecConfig.getXML() );
mrJobConfig.set( MRTestUtil.TRANSFORMATION_REDUCE_INPUT_STEPNAME, REDUCE_INPUT_STEPNAME );
mrJobConfig.set( MRTestUtil.TRANSFORMATION_REDUCE_OUTPUT_STEPNAME, REDUCE_OUTPUT_STEPNAME );
assertNull( genericTransReduce.getInputStepName() );
assertNull( genericTransReduce.getOutputStepName() );
genericTransReduce.configure( mrJobConfig );
assertEquals( REDUCE_INPUT_STEPNAME, genericTransReduce.getInputStepName() );
assertEquals( REDUCE_OUTPUT_STEPNAME, genericTransReduce.getOutputStepName() );
}
@Test
public void testReducer_null_output_value() throws Exception {
transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_NULL_TEST_TRANSFORMATION ).toURI().getPath() );
MRTestUtil.configJobReducerBaseCase( transMeta, mrJobConfig, genericTransReduce );
genericTransReduce.reduce( MRTestUtil.KEY_TO_NULL, Arrays.asList( MRTestUtil.VALUE_TO_NULL ).iterator(), outputCollectorMock, reporterMock );
genericTransReduce.close();
outputCollectorMock.close();
Exception ex = genericTransReduce.getException();
assertNull( "Exception thrown", ex );
assertEquals( "Received output when we didn't expect any. <null>s aren't passed through.", 0, outputCollectorMock.getCollection().size() );
}
@Test
public void testReducer_not_null_outputValues() throws Exception {
transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_NOT_NULL_TEST_TRANSFORMATION ).toURI().getPath() );
MRTestUtil.configJobReducerBaseCase( transMeta, mrJobConfig, genericTransReduce );
Text expectedKey = new Text( "test" );
List<IntWritable> expectedValue = Arrays.asList( new IntWritable( 8 ), new IntWritable( 9 ) );
genericTransReduce.reduce( expectedKey, expectedValue.iterator(), outputCollectorMock, reporterMock );
genericTransReduce.close();
outputCollectorMock.close();
Exception ex = genericTransReduce.getException();
assertNull( "Exception thrown", ex );
assertEquals( 1, outputCollectorMock.getCollection().size() );
assertEquals( expectedValue, outputCollectorMock.getCollection().get( expectedKey ) );
}
@Test
public void testReducer_WordCount() throws Exception {
transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_WORDCOUNT_REDUCER_TEST_TRANSFORMATION ).toURI().getPath() );
MRTestUtil.configJobReducerBaseCase( transMeta, mrJobConfig, genericTransReduce );
Text wordToCount = new Text( "test" );
IntWritable[] wordCountArray = new IntWritable[] { new IntWritable( 8 ), new IntWritable( 9 ), new IntWritable( 1 ) };
IntWritable expectedWordCount = new IntWritable( Arrays.stream( wordCountArray ).mapToInt( IntWritable::get ).sum() );
genericTransReduce.reduce( wordToCount, Arrays.asList( wordCountArray ).iterator(), outputCollectorMock, reporterMock );
genericTransReduce.close();
outputCollectorMock.close();
Exception ex = genericTransReduce.getException();
assertNull( "Exception thrown", ex );
assertEquals( 1, outputCollectorMock.getCollection().size() );
assertEquals( expectedWordCount, outputCollectorMock.getCollection().get( wordToCount ).get( 0 ) );
}
@Test
public void testLogChannelLeaking() throws Exception {
transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_WORDCOUNT_REDUCER_TEST_TRANSFORMATION ).toURI().getPath() );
MRTestUtil.configJobReducerBaseCase( transMeta, mrJobConfig, genericTransReduce );
int logChannels = LoggingRegistry.getInstance().getMap().size();
Text wordToCount = null;
int expectedOutputCollectorMockSize = 0;
assertEquals( "Incorrect output", expectedOutputCollectorMockSize, outputCollectorMock.getCollection().size() );
for ( int i = 0; i < RUNS; i++ ) {
// set up test key and value for reducer as a pair of elements: word1-->[1], word2-->[1,2] ...,
// wordN-->[1,...,N-1,N]
wordToCount = new Text( "word" + ( i + 1 ) );
List<IntWritable> wordCounts = IntStream.rangeClosed( 1, i + 1 ).mapToObj( value -> new IntWritable( value ) ).collect( Collectors.toList() );
IntWritable expectedWordCount = new IntWritable( wordCounts.stream().mapToInt( IntWritable::get ).sum() );
genericTransReduce.reduce( wordToCount, wordCounts.iterator(), outputCollectorMock, reporterMock );
genericTransReduce.close();
expectedOutputCollectorMockSize++;
assertNull( "Exception thrown", genericTransReduce.getException() );
assertEquals( "Incorrect output", expectedOutputCollectorMockSize, outputCollectorMock.getCollection().size() );
assertEquals( expectedWordCount, outputCollectorMock.getCollection().get( wordToCount ).get( 0 ) );
assertEquals( "LogChannels are not being cleaned up. On Run #" + ( i + 1 ) + " we have too many.", logChannels + EXPECTED_CHANNELS_PER_RUN, LoggingRegistry.getInstance().getMap().size() );
}
outputCollectorMock.close();
assertEquals( logChannels + EXPECTED_CHANNELS_PER_RUN, LoggingRegistry.getInstance().getMap().size() );
}
@Test
public void testReducerNoOutputStep() throws KettleException, URISyntaxException {
//Turn off displaying stack trace of expected exception to reduce unit test logs
mrJobConfig.set( "debug", "false" );
try {
transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_NO_OUTPUT_STEP_TEST_TRANSFORMATION ).toURI().getPath() );
MRTestUtil.configJobReducerBaseCase( transMeta, mrJobConfig, genericTransReduce );
genericTransReduce.reduce( new Text( "key" ), Arrays.asList( new IntWritable( 8 ) ).iterator(), outputCollectorMock, reporterMock );
genericTransReduce.close();
fail( "Should have thrown an exception " );
} catch ( IOException e ) {
assertTrue( "Test for KettleException", e.getMessage().contains( "Output step not defined in transformation" ) );
}
}
@Test
public void testReducerBadInjectorFields() throws KettleException, URISyntaxException {
//Turn off displaying stack trace of expected exception to reduce unit test logs
mrJobConfig.set( "debug", "false" );
try {
transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_BAD_INJECTOR_STEP_TEST_TRANSFORMATION ).toURI().getPath() );
MRTestUtil.configJobReducerBaseCase( transMeta, mrJobConfig, genericTransReduce );
genericTransReduce.reduce( new Text( "key" ), Arrays.asList( new IntWritable( 8 ) ).iterator(), outputCollectorMock, reporterMock );
fail( "Should have thrown an exception" );
} catch ( IOException e ) {
assertTrue( "Test for KettleException", e.getMessage().contains( "key or value is not defined in transformation injector step" ) );
}
}
@Test
public void testReducerNoInjectorStep() throws IOException, KettleException, URISyntaxException {
//Turn off displaying stack trace of expected exception to reduce unit test logs
mrJobConfig.set( "debug", "false" );
try {
transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_NO_INJECTOR_STEP_TEST_TRANSFORMATION ).toURI().getPath() );
MRTestUtil.configJobReducerBaseCase( transMeta, mrJobConfig, genericTransReduce );
genericTransReduce.reduce( new Text( "key" ), Arrays.asList( new IntWritable( 8 ) ).iterator(), outputCollectorMock, reporterMock );
fail( "Should have thrown an exception" );
} catch ( IOException e ) {
assertTrue( "Test for KettleException", e.getMessage().contains( "Unable to find thread with name Injector and copy number 0" ) );
}
}
}