package eu.dnetlib.iis.wf.affmatching.bucket.projectorg.read;
import java.io.Serializable;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import eu.dnetlib.iis.wf.affmatching.bucket.projectorg.model.AffMatchDocumentProject;
/**
* Fetcher of document-project relations ({@link AffMatchDocumentProject}) using two input sources.<br />
* This fetcher internally uses two {@link DocumentProjectReader}s to read
* document-project relations located under different paths and then
* merges them using {@link DocumentProjectMerger}.
*
* @author madryk
*/
public class DocumentProjectFetcher implements Serializable {
private static final long serialVersionUID = 1L;
private DocumentProjectReader firstDocumentProjectReader;
private DocumentProjectReader secondDocumentProjectReader;
private DocumentProjectMerger documentProjectMerger;
private transient JavaSparkContext sparkContext;
private String firstDocProjPath;
private String secondDocProjPath;
//------------------------ LOGIC --------------------------
/**
*
* Returns merged document-project relations that was read from two different paths.
*/
public JavaRDD<AffMatchDocumentProject> fetchDocumentProjects() {
JavaRDD<AffMatchDocumentProject> firstDocumentProjects = firstDocumentProjectReader.readDocumentProjects(sparkContext, firstDocProjPath);
JavaRDD<AffMatchDocumentProject> secondDocumentProjects = secondDocumentProjectReader.readDocumentProjects(sparkContext, secondDocProjPath);
JavaRDD<AffMatchDocumentProject> mergedDocumentProjects = documentProjectMerger.merge(firstDocumentProjects, secondDocumentProjects);
return mergedDocumentProjects;
}
//------------------------ SETTERS --------------------------
public void setFirstDocumentProjectReader(DocumentProjectReader firstDocumentProjectReader) {
this.firstDocumentProjectReader = firstDocumentProjectReader;
}
public void setSecondDocumentProjectReader(DocumentProjectReader secondDocumentProjectReader) {
this.secondDocumentProjectReader = secondDocumentProjectReader;
}
public void setDocumentProjectMerger(DocumentProjectMerger documentProjectMerger) {
this.documentProjectMerger = documentProjectMerger;
}
public void setSparkContext(JavaSparkContext sparkContext) {
this.sparkContext = sparkContext;
}
public void setFirstDocProjPath(String firstDocProjPath) {
this.firstDocProjPath = firstDocProjPath;
}
public void setSecondDocProjPath(String secondDocProjPath) {
this.secondDocProjPath = secondDocProjPath;
}
}