package eu.dnetlib.iis.common.spark.pipe; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.anyString; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoMoreInteractions; import static org.mockito.Mockito.when; import java.io.File; import org.apache.avro.generic.GenericRecord; import org.apache.avro.mapred.AvroKey; import org.apache.hadoop.io.NullWritable; import org.apache.spark.SparkEnv; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.PairFunction; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.ArgumentCaptor; import org.mockito.Captor; import org.mockito.Matchers; import org.mockito.Mock; import org.mockito.runners.MockitoJUnitRunner; import eu.dnetlib.iis.common.avro.personwithdocuments.Document; import scala.Tuple2; /** * * @author madryk * */ @RunWith(MockitoJUnitRunner.class) public class SparkPipeExecutorTest { private SparkPipeExecutor pipeExecutor = new SparkPipeExecutor(); @Mock private JavaPairRDD<AvroKey<GenericRecord>, NullWritable> avroKeyValueRDD; @Mock private JavaRDD<AvroKey<GenericRecord>> avroRDD; @Mock private JavaRDD<String> mappedStringRDD; @Mock private JavaPairRDD<String, String> mappedKeyValueRDD; @Mock private JavaPairRDD<String, String> sortedKeyValueRDD; @Mock private JavaRDD<String> stringRDD; @Mock private JavaRDD<String> reducedStringRDD; @Mock private JavaRDD<Document> reducedAvroDocumentRDD; @Mock private JavaPairRDD<AvroKey<GenericRecord>, NullWritable> reducedAvroKeyValueRDD; @Captor private ArgumentCaptor<PairFunction<String, String, String>> stringToKeyValueFunctionArg; @Captor private ArgumentCaptor<Function<Tuple2<String, String>, String>> keyValueToStringFunctionArg; @Captor private ArgumentCaptor<Function<String, Document>> jsonToAvroDocumentFunctionArg; @Captor private ArgumentCaptor<PairFunction<Document, AvroKey<GenericRecord>, NullWritable>> avroDocumentToAvroKeyValueFunctionArg; @Before public void setUp() { SparkEnv sparkEnv = mock(SparkEnv.class); when(sparkEnv.sparkFilesDir()).thenReturn("/some/spark/dir"); SparkEnv.set(sparkEnv); } //------------------------ TESTS -------------------------- @Test public void mapTest() throws Exception { // given when(avroKeyValueRDD.keys()).thenReturn(avroRDD); when(avroRDD.pipe(anyString())).thenReturn(mappedStringRDD); when(mappedStringRDD.mapToPair(Matchers.<PairFunction<String, String, String>>any())).thenReturn(mappedKeyValueRDD); // execute JavaPairRDD<String, String> retMappedRDD = pipeExecutor.doMap(avroKeyValueRDD, "mapScriptName", "-arg=value"); // assert assertTrue(mappedKeyValueRDD == retMappedRDD); verify(avroKeyValueRDD).keys(); verify(avroRDD).pipe("python " + new File("/some/spark/dir/mapScriptName").getAbsolutePath() + " -arg=value"); verify(mappedStringRDD).mapToPair(stringToKeyValueFunctionArg.capture()); assertStringToKeyValueFunction(stringToKeyValueFunctionArg.getValue()); verifyNoMoreInteractions(avroKeyValueRDD, avroRDD, mappedStringRDD, mappedKeyValueRDD); } @Test public void reduceTest() throws Exception { // given when(mappedKeyValueRDD.sortByKey()).thenReturn(sortedKeyValueRDD); when(sortedKeyValueRDD.map(Matchers.<Function<Tuple2<String, String>, String>>any())).thenReturn(stringRDD); when(stringRDD.pipe(anyString())).thenReturn(reducedStringRDD); when(reducedStringRDD.map(Matchers.<Function<String, Document>>any())).thenReturn(reducedAvroDocumentRDD); when(reducedAvroDocumentRDD.mapToPair(Matchers.<PairFunction<Document, AvroKey<GenericRecord>, NullWritable>>any())).thenReturn(reducedAvroKeyValueRDD); // execute JavaPairRDD<AvroKey<GenericRecord>, NullWritable> retReducedRDD = pipeExecutor.doReduce(mappedKeyValueRDD, "reducerScriptName", "-arg=value", Document.class); // assert assertTrue(reducedAvroKeyValueRDD == retReducedRDD); verify(mappedKeyValueRDD).sortByKey(); verify(sortedKeyValueRDD).map(keyValueToStringFunctionArg.capture()); assertKeyValueToStringFunction(keyValueToStringFunctionArg.getValue()); verify(stringRDD).pipe("python " + new File("/some/spark/dir/reducerScriptName").getAbsolutePath() + " -arg=value"); verify(reducedStringRDD).map(jsonToAvroDocumentFunctionArg.capture()); assertJsonToAvroDocumentFunction(jsonToAvroDocumentFunctionArg.getValue()); verify(reducedAvroDocumentRDD).mapToPair(avroDocumentToAvroKeyValueFunctionArg.capture()); assertAvroDocumentToAvroKeyValueFunction(avroDocumentToAvroKeyValueFunctionArg.getValue()); verifyNoMoreInteractions(mappedKeyValueRDD, sortedKeyValueRDD, stringRDD, reducedStringRDD, reducedAvroDocumentRDD, reducedAvroKeyValueRDD); } //------------------------ PRIVATE -------------------------- private void assertStringToKeyValueFunction(PairFunction<String, String, String> function) throws Exception { assertEquals("key", function.call("key\tvalue")._1); assertEquals("value", function.call("key\tvalue")._2); assertEquals("key_only", function.call("key_only")._1); assertNull(function.call("key_only")._2); } private void assertKeyValueToStringFunction(Function<Tuple2<String, String>, String> function) throws Exception { assertEquals("key\tvalue", function.call(new Tuple2<String, String>("key", "value"))); assertEquals("key_only", function.call(new Tuple2<String, String>("key_only", null))); } private void assertJsonToAvroDocumentFunction(Function<String, Document> function) throws Exception { assertEquals(new Document(5, "doc_title"), function.call("{\"id\": 5, \"title\": \"doc_title\"}")); } private void assertAvroDocumentToAvroKeyValueFunction(PairFunction<Document, AvroKey<GenericRecord>, NullWritable> function) throws Exception { Document document = new Document(5, "doc_title"); Tuple2<AvroKey<GenericRecord>, NullWritable> retTuple = function.call(document); assertEquals(document, retTuple._1.datum()); } }