package eu.dnetlib.iis.wf.citationmatching.input;
import java.io.File;
import java.io.IOException;
import org.apache.commons.io.FileUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import com.google.common.io.Files;
import eu.dnetlib.iis.citationmatching.schemas.DocumentMetadata;
import eu.dnetlib.iis.common.utils.AvroAssertTestUtil;
import eu.dnetlib.iis.common.utils.AvroTestUtils;
import eu.dnetlib.iis.common.utils.JsonAvroTestUtils;
import eu.dnetlib.iis.importer.schemas.Person;
import eu.dnetlib.iis.transformers.metadatamerger.schemas.ExtractedDocumentMetadataMergedWithOriginal;
import pl.edu.icm.sparkutils.test.SparkJob;
import pl.edu.icm.sparkutils.test.SparkJobBuilder;
import pl.edu.icm.sparkutils.test.SparkJobExecutor;
public class CitationMatchingInputTransformerJobTest {
private static final String DATA_DIRECTORY_PATH = "src/test/resources/eu/dnetlib/iis/wf/citationmatching/data/input_transformer";
private SparkJobExecutor executor = new SparkJobExecutor();
private File workingDir;
private String inputMetadataPath;
private String inputPersonPath;
private String outputDirPath;
@Before
public void before() {
workingDir = Files.createTempDir();
inputMetadataPath = workingDir + "/spark_citation_matching_input_transformer/inputMetadata";
inputPersonPath = workingDir + "/spark_citation_matching_input_transformer/inputPerson";
outputDirPath = workingDir + "/spark_citation_matching_input_transformer/output";
}
@After
public void after() throws IOException {
FileUtils.deleteDirectory(workingDir);
}
//------------------------ TESTS --------------------------
@Test
public void citationMatchingInputTransformer() throws IOException {
// given
String jsonInputMetadataFile = DATA_DIRECTORY_PATH + "/full_document.json";
String jsonInputPersonFile = DATA_DIRECTORY_PATH + "/person.json";
String jsonOutputFile = DATA_DIRECTORY_PATH + "/document.json";
AvroTestUtils.createLocalAvroDataStore(
JsonAvroTestUtils.readJsonDataStore(jsonInputMetadataFile, ExtractedDocumentMetadataMergedWithOriginal.class),
inputMetadataPath);
AvroTestUtils.createLocalAvroDataStore(
JsonAvroTestUtils.readJsonDataStore(jsonInputPersonFile, Person.class),
inputPersonPath);
// execute
executor.execute(buildCitationMatchingInputTransformerJob(inputMetadataPath, inputPersonPath, outputDirPath));
// assert
AvroAssertTestUtil.assertEqualsWithJsonIgnoreOrder(outputDirPath, jsonOutputFile, DocumentMetadata.class);
}
//------------------------ PRIVATE --------------------------
private SparkJob buildCitationMatchingInputTransformerJob(String inputMetadataDirPath, String inputPersonDirPath, String outputDirPath) {
SparkJob sparkJob = SparkJobBuilder
.create()
.setAppName("Spark Citation Matching - Input Transformer")
.setMainClass(CitationMatchingInputTransformerJob.class)
.addArg("-inputMetadata", inputMetadataDirPath)
.addArg("-inputPerson", inputPersonDirPath)
.addArg("-output", outputDirPath)
.addJobProperty("spark.driver.host", "localhost")
.build();
return sparkJob;
}
}