package eu.dnetlib.iis.wf.affmatching.bucket.projectorg.read; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import eu.dnetlib.iis.importer.schemas.Organization; import eu.dnetlib.iis.importer.schemas.ProjectToOrganization; import eu.dnetlib.iis.wf.affmatching.bucket.projectorg.model.AffMatchProjectOrganization; import pl.edu.icm.sparkutils.avro.SparkAvroLoader; /** * Implementation of {@link ProjectOrganizationReader} that reads IIS relations, * objects of {@link ProjectToOrganization} written in avro files. * * @author mhorst */ public class IisProjectOrganizationReader implements ProjectOrganizationReader { private static final long serialVersionUID = 1L; private SparkAvroLoader avroLoader = new SparkAvroLoader(); private ProjectOrganizationConverter converter = new ProjectOrganizationConverter(); // ------------------------ LOGIC -------------------------- /** * Reads {@link Organization}s written as avro files under <code>inputPath</code> */ @Override public JavaRDD<AffMatchProjectOrganization> readProjectOrganizations(JavaSparkContext sc, String inputPath) { return avroLoader.loadJavaRDD(sc, inputPath, ProjectToOrganization.class) .map(srcProjOrg -> converter.convert(srcProjOrg)); } // ------------------------ SETTERS -------------------------- public void setAvroLoader(SparkAvroLoader avroLoader) { this.avroLoader = avroLoader; } public void setProjectOrganizationConverter(ProjectOrganizationConverter converter) { this.converter = converter; } }