package eu.dnetlib.iis.wf.citationmatching.input;
import java.io.IOException;
import java.util.Map;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import eu.dnetlib.iis.citationmatching.schemas.DocumentMetadata;
import eu.dnetlib.iis.common.utils.AvroAssertTestUtil;
import eu.dnetlib.iis.common.utils.JsonAvroTestUtils;
/**
* @author madryk
*/
public class AuthorNameAttacherTest {
private static final String DATA_DIRECTORY_PATH = "src/test/resources/eu/dnetlib/iis/wf/citationmatching/data/input_transformer";
private AuthorNameAttacher authorNameAttacher = new AuthorNameAttacher();
private JavaSparkContext sparkContext;
@Before
public void before() {
SparkConf conf = new SparkConf().setMaster("local").setAppName("AuthorNameAttacherTest")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.set("spark.kryo.registrator", "pl.edu.icm.sparkutils.avro.AvroCompatibleKryoRegistrator");
conf.set("spark.driver.host", "localhost");
sparkContext = new JavaSparkContext(conf);
}
@After
public void after() {
if (sparkContext != null) {
sparkContext.close();
sparkContext = null;
}
}
//------------------------ TESTS --------------------------
@Test
public void attachAuthorNames() throws IOException {
// given
String documentwithAuthorIdPath = DATA_DIRECTORY_PATH + "/document_without_author_name.json";
JavaPairRDD<String, DocumentMetadata> documentsWithAuthorIds = sparkContext
.parallelize(JsonAvroTestUtils.readJsonDataStore(documentwithAuthorIdPath, DocumentMetadata.class))
.keyBy(x -> x.getId().toString());
JavaPairRDD<String, Map<String, String>> documentAuthors = sparkContext.parallelizePairs(
AuthorNameMappingDataProvider.fetchDocumentAuthors());
// execute
JavaPairRDD<String, DocumentMetadata> retDocuments = authorNameAttacher.attachAuthorNames(documentsWithAuthorIds, documentAuthors);
// assert
String expectedDocumentsPath = DATA_DIRECTORY_PATH + "/document.json";
AvroAssertTestUtil.assertEqualsWithJsonIgnoreOrder(retDocuments.values().collect(), expectedDocumentsPath, DocumentMetadata.class);
}
}