package eu.dnetlib.iis.wf.affmatching.bucket.projectorg.read; import static com.google.common.collect.ImmutableList.of; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsInAnyOrder; import java.util.List; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.junit.After; import org.junit.Before; import org.junit.Test; import eu.dnetlib.iis.wf.affmatching.bucket.projectorg.model.AffMatchDocumentProject; /** * @author madryk */ public class DocumentProjectMergerTest { private DocumentProjectMerger documentProjectMerger = new DocumentProjectMerger(); private JavaSparkContext sparkContext; @Before public void setup() { SparkConf conf = new SparkConf(); conf.setMaster("local"); conf.setAppName(getClass().getName()); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.set("spark.kryo.registrator", "pl.edu.icm.sparkutils.avro.AvroCompatibleKryoRegistrator"); conf.set("spark.driver.host", "localhost"); sparkContext = new JavaSparkContext(conf); } @After public void cleanup() { if (sparkContext != null) { sparkContext.close(); } } //------------------------ TESTS -------------------------- @Test public void merge() { // given JavaRDD<AffMatchDocumentProject> firstDocumentProjects = sparkContext.parallelize(of( new AffMatchDocumentProject("DOC1", "PROJ1", 1f), new AffMatchDocumentProject("DOC1", "PROJ2", 0.6f), new AffMatchDocumentProject("DOC1", "PROJ3", 0.4f), new AffMatchDocumentProject("DOC2", "PROJ4", 0.8f))); JavaRDD<AffMatchDocumentProject> secondDocumentProjects = sparkContext.parallelize(of( new AffMatchDocumentProject("DOC1", "PROJ1", 0.3f), new AffMatchDocumentProject("DOC1", "PROJ2", 1f), new AffMatchDocumentProject("DOC1", "PROJ4", 0.7f))); // execute JavaRDD<AffMatchDocumentProject> retDocumentProjects = documentProjectMerger .merge(firstDocumentProjects, secondDocumentProjects); // assert List<AffMatchDocumentProject> expectedDocumentProjects = of( new AffMatchDocumentProject("DOC1", "PROJ1", 1f), // from first rdd (higher confidence level) new AffMatchDocumentProject("DOC1", "PROJ2", 1f), // from second rdd (higher confidence level) new AffMatchDocumentProject("DOC1", "PROJ3", 0.4f), new AffMatchDocumentProject("DOC1", "PROJ4", 0.7f), new AffMatchDocumentProject("DOC2", "PROJ4", 0.8f)); assertThat(retDocumentProjects.collect(), containsInAnyOrder(expectedDocumentProjects.toArray())); } }