package eu.dnetlib.iis.wf.citationmatching.input;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsInAnyOrder;
import java.io.IOException;
import java.util.Map;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import eu.dnetlib.iis.citationmatching.schemas.DocumentMetadata;
import eu.dnetlib.iis.common.utils.JsonAvroTestUtils;
import eu.dnetlib.iis.importer.schemas.Person;
/**
* @author madryk
*/
public class AuthorNameMappingExtractorTest {
private static final String DATA_DIRECTORY_PATH = "src/test/resources/eu/dnetlib/iis/wf/citationmatching/data/input_transformer";
private AuthorNameMappingExtractor authorNameMappingExtractor = new AuthorNameMappingExtractor();
private JavaSparkContext sparkContext;
@Before
public void before() {
SparkConf conf = new SparkConf().setMaster("local").setAppName("AuthorNameAttacherTest")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.set("spark.kryo.registrator", "pl.edu.icm.sparkutils.avro.AvroCompatibleKryoRegistrator");
conf.set("spark.driver.host", "localhost");
sparkContext = new JavaSparkContext(conf);
}
@After
public void after() {
if (sparkContext != null) {
sparkContext.close();
sparkContext = null;
}
}
//------------------------ TESTS --------------------------
@Test
public void attachAuthorNames() throws IOException {
// given
String documentwithAuthorIdPath = DATA_DIRECTORY_PATH + "/document_without_author_name.json";
String personPath = DATA_DIRECTORY_PATH + "/person.json";
JavaPairRDD<String, DocumentMetadata> documentsWithAuthorIds = sparkContext
.parallelize(JsonAvroTestUtils.readJsonDataStore(documentwithAuthorIdPath, DocumentMetadata.class))
.keyBy(x -> x.getId().toString());
JavaRDD<Person> persons = sparkContext.parallelize(JsonAvroTestUtils.readJsonDataStore(personPath, Person.class));
// execute
JavaPairRDD<String, Map<String, String>> retAuthorNameMapping = authorNameMappingExtractor.extractAuthorNameMapping(documentsWithAuthorIds, persons);
// assert
assertThat(retAuthorNameMapping.collect(), containsInAnyOrder(AuthorNameMappingDataProvider.fetchDocumentAuthors().toArray()));
}
}