package eu.dnetlib.iis.wf.affmatching.bucket.projectorg.read; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject; import eu.dnetlib.iis.wf.affmatching.bucket.projectorg.model.AffMatchDocumentProject; import pl.edu.icm.sparkutils.avro.SparkAvroLoader; /** * Implementation of {@link DocumentProjectReader} that reads IIS relations, * objects of {@link DocumentToProject} written in avro files. * * @author mhorst */ public class IisInferredDocumentProjectReader implements DocumentProjectReader { private static final long serialVersionUID = 1L; private SparkAvroLoader avroLoader = new SparkAvroLoader(); private InferredDocumentProjectConverter converter = new InferredDocumentProjectConverter(); // ------------------------ LOGIC -------------------------- /** * Reads {@link AffMatchDocumentProject}s rdd written as avro files under <code>inputPath</code> * with {@link DocumentToProject} schema. */ @Override public JavaRDD<AffMatchDocumentProject> readDocumentProjects(JavaSparkContext sc, String inputPath) { return avroLoader.loadJavaRDD(sc, inputPath, DocumentToProject.class) .map(srcDocProj -> converter.convert(srcDocProj)); } // ------------------------ SETTERS -------------------------- public void setAvroLoader(SparkAvroLoader avroLoader) { this.avroLoader = avroLoader; } public void setDocumentProjectConverter(InferredDocumentProjectConverter converter) { this.converter = converter; } }