package eu.dnetlib.iis.wf.citationmatching; import java.io.Serializable; import org.apache.hadoop.io.NullWritable; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import eu.dnetlib.iis.citationmatching.schemas.Citation; import pl.edu.icm.coansys.citations.OutputWriter; import pl.edu.icm.sparkutils.avro.SparkAvroSaver; /** * Writer of output {@link Citation}s * * @author madryk * */ public class CitationOutputWriter implements OutputWriter<Citation, NullWritable>, Serializable { private static final long serialVersionUID = 1L; private SparkAvroSaver avroSaver = new SparkAvroSaver(); private CitationMatchingCounterReporter citationMatchingReporter; //------------------------ LOGIC -------------------------- /** * Writes rdd with {@link Citation}s to path specified as argument */ @Override public void writeMatchedCitations(JavaPairRDD<Citation, NullWritable> matchedCitations, String path) { JavaRDD<Citation> matchedCitationsKeys = matchedCitations.keys(); matchedCitationsKeys.cache(); avroSaver.saveJavaRDD(matchedCitationsKeys, Citation.SCHEMA$, path); citationMatchingReporter.report(matchedCitationsKeys); } //------------------------ SETTERS -------------------------- public void setCitationMatchingReporter(CitationMatchingCounterReporter citationMatchingReporter) { this.citationMatchingReporter = citationMatchingReporter; } }