/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.hadoop.mapreduce; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Mockito.mock; import java.io.IOException; import java.net.URISyntaxException; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.pentaho.di.core.KettleEnvironment; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.logging.LoggingRegistry; import org.pentaho.di.trans.TransConfiguration; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.core.logging.LogLevel; /** * @author Tatsiana_Kasiankova * */ @SuppressWarnings( { "unchecked", "rawtypes" } ) public class PentahoMapRunnableTest { private static final String WORD_TO_COUNT_TEMPLATE = "word"; private static final String INTERNAL_HADOOP_NODE_NUMBER = "Internal.Hadoop.NodeNumber"; private static final String MAPRED_TASK_ID = "mapred.task.id"; private static final String MAP_TRANS_META_NAME = "Map transformation"; /** * Mock trans configuration: empty trans meta with name and empty trans execution configuration */ private static TransConfiguration combinerTransExecutionConfig; /** * We expect 5 log channels per run. The total should never grow past logChannelsBefore + 5. */ final int EXPECTED_CHANNELS_PER_RUN = 5; /** * Run the reducer this many times */ final int RUNS = 10; private Reporter reporterMock = mock( Reporter.class ); private PentahoMapRunnable mapRunnable; private JobConf mrJobConfig; private TransMeta transMeta; private MockOutputCollector outputCollectorMock = new MockOutputCollector(); private MockRecordReader reader; @BeforeClass public static void before() throws KettleException { KettleEnvironment.init(); combinerTransExecutionConfig = MRTestUtil.getTransExecConfig( MRTestUtil.getTransMeta( MAP_TRANS_META_NAME ) ); } @Before public void setUp() throws KettleException, IOException { mapRunnable = new PentahoMapRunnable(); mrJobConfig = new JobConf(); //Turn off all debug messages from PentahoMapRunnable to reduce unit test logs mrJobConfig.set( "logLevel", LogLevel.ERROR.name() ); } @Test public void testTaskIdExtraction() throws Exception { mrJobConfig.set( MRTestUtil.TRANSFORMATION_MAP_XML, combinerTransExecutionConfig.getXML() ); mrJobConfig.set( MAPRED_TASK_ID, "job_201208090841_0133" ); mapRunnable.configure( mrJobConfig ); String actualVariable = mapRunnable.variableSpace.getVariable( INTERNAL_HADOOP_NODE_NUMBER ); assertEquals( "133", actualVariable ); } @Test public void testTaskIdExtraction_over_10000() throws Exception { mrJobConfig.set( MRTestUtil.TRANSFORMATION_MAP_XML, combinerTransExecutionConfig.getXML() ); mrJobConfig.set( MAPRED_TASK_ID, "job_201208090841_013302" ); mapRunnable.configure( mrJobConfig ); String actualVariable = mapRunnable.variableSpace.getVariable( INTERNAL_HADOOP_NODE_NUMBER ); assertEquals( "13302", actualVariable ); } @Test public void testMapper_null_output_value() throws Exception { transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_NULL_TEST_TRANSFORMATION ).toURI().getPath() ); MRTestUtil.configJobMapBaseCase( transMeta, mrJobConfig, mapRunnable ); reader = new MockRecordReader( Arrays.asList( "test" ) ); mapRunnable.run( reader, outputCollectorMock, reporterMock ); Thread.sleep( 300 ); outputCollectorMock.close(); assertNull( "Exception thrown", mapRunnable.getException() ); assertEquals( "Received output when we didn't expect any. <null>s aren't passed through.", 0, outputCollectorMock.getCollection().size() ); } @Test public void testMapperNoOutputStep() throws KettleException, URISyntaxException { //Turn off displaying stack trace of expected exception to reduce unit test logs mrJobConfig.set( "debug", "false" ); try { transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_NO_OUTPUT_STEP_TEST_TRANSFORMATION ).toURI().getPath() ); MRTestUtil.configJobMapBaseCase( transMeta, mrJobConfig, mapRunnable ); reader = new MockRecordReader( Arrays.asList( "test" ) ); mapRunnable.run( reader, outputCollectorMock, reporterMock ); fail( "Should have thrown an exception " ); } catch ( IOException e ) { assertTrue( "Test for KettleException", e.getMessage().contains( "Output step not defined in transformation" ) ); } } @Test public void testMapperBadInjectorFields() throws KettleException, URISyntaxException { //Turn off displaying stack trace of expected exception to reduce unit test logs mrJobConfig.set( "debug", "false" ); try { transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_BAD_INJECTOR_STEP_TEST_TRANSFORMATION ).toURI().getPath() ); MRTestUtil.configJobMapBaseCase( transMeta, mrJobConfig, mapRunnable ); reader = new MockRecordReader( Arrays.asList( "test" ) ); mapRunnable.run( reader, outputCollectorMock, reporterMock ); fail( "Should have thrown an exception" ); } catch ( IOException e ) { assertTrue( "Test for KettleException", e.getMessage().contains( "key or value is not defined in transformation injector step" ) ); } } @Test public void testMapperNoInjectorStep() throws KettleException, URISyntaxException { //Turn off displaying stack trace of expected exception to reduce unit test logs mrJobConfig.set( "debug", "false" ); try { transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_NO_INJECTOR_STEP_TEST_TRANSFORMATION ).toURI().getPath() ); MRTestUtil.configJobMapBaseCase( transMeta, mrJobConfig, mapRunnable ); reader = new MockRecordReader( Arrays.asList( "test" ) ); mapRunnable.run( reader, outputCollectorMock, reporterMock ); fail( "Should have thrown an exception" ); } catch ( IOException e ) { assertTrue( "Test for KettleException", e.getMessage().contains( "Unable to find thread with name Injector and copy number 0" ) ); } } @Test public void testLogChannelLeaking() throws Exception { transMeta = new TransMeta( getClass().getResource( MRTestUtil.PATH_TO_WORDCOUNT_MAPPER_TEST_TRANSFORMATION ).toURI().getPath() ); MRTestUtil.configJobMapBaseCase( transMeta, mrJobConfig, mapRunnable ); int logChannels = LoggingRegistry.getInstance().getMap().size(); int expectedOutputCollectorMockSize = 0; List<IntWritable> expectedWordCountArrays = null; assertEquals( "Incorrect output ", expectedOutputCollectorMockSize, outputCollectorMock.getCollection().size() ); for ( int i = 0; i < RUNS; i++ ) { // set up test value rows List<String> wordsToCount = IntStream.rangeClosed( 1, i + 1 ).mapToObj( value -> String.valueOf( WORD_TO_COUNT_TEMPLATE + value ) ).collect( Collectors.toList() ); reader = new MockRecordReader( wordsToCount ); mapRunnable.run( reader, outputCollectorMock, reporterMock ); expectedOutputCollectorMockSize++; assertNull( "Exception thrown", mapRunnable.getException() ); assertEquals( "Incorrect output", expectedOutputCollectorMockSize, outputCollectorMock.getCollection().size() ); assertEquals( "LogChannels are not being cleaned up. On Run #" + ( i + 1 ) + " we have too many.", logChannels + EXPECTED_CHANNELS_PER_RUN, LoggingRegistry.getInstance().getMap().size() ); } outputCollectorMock.close(); // outputCollectorMock.getCollection().forEach( ( k, v ) -> System.out.println( "outputCollectorMock: Item : " + k + // " Count : " + v ) ); // verifying the arrays of word count for the each word for ( int i = RUNS; i > 0; i-- ) { expectedWordCountArrays = IntStream.rangeClosed( 1, RUNS - i + 1 ).mapToObj( value -> new IntWritable( 1 ) ).collect( Collectors.toList() ); assertEquals( "Incorrect count array for the word: " + WORD_TO_COUNT_TEMPLATE + i, expectedWordCountArrays, outputCollectorMock.getCollection().get( new Text( WORD_TO_COUNT_TEMPLATE + i ) ) ); } assertEquals( logChannels + EXPECTED_CHANNELS_PER_RUN, LoggingRegistry.getInstance().getMap().size() ); } }