package eu.dnetlib.iis.wf.citationmatching; import java.io.Serializable; import org.apache.hadoop.io.NullWritable; import org.apache.spark.api.java.JavaPairRDD; import eu.dnetlib.iis.citationmatching.schemas.Citation; import eu.dnetlib.iis.wf.citationmatching.converter.MatchedCitationToCitationConverter; import pl.edu.icm.coansys.citations.OutputConverter; import pl.edu.icm.coansys.citations.data.IdWithSimilarity; import pl.edu.icm.coansys.citations.data.MatchableEntity; import scala.Tuple2; /** * Converter of {@link MatchableEntity} and {@link IdWithSimilarity} pair rdd * to {@link Citation} rdd * * @author madryk */ public class CitationOutputConverter implements OutputConverter<Citation, NullWritable>, Serializable { private static final long serialVersionUID = 1L; private MatchedCitationToCitationConverter converter = new MatchedCitationToCitationConverter(); //------------------------ LOGIC -------------------------- /** * Converts rdd with matched citations to rdd with {@link Citation}s. */ @Override public JavaPairRDD<Citation, NullWritable> convertMatchedCitations( JavaPairRDD<MatchableEntity, IdWithSimilarity> matchedCitations) { JavaPairRDD<Citation, NullWritable> convertedMatchedCitations = matchedCitations .mapToPair(x -> new Tuple2<>(converter.convertToCitation(x._1, x._2), NullWritable.get())); return convertedMatchedCitations; } //------------------------ SETTERS -------------------------- public void setConverter(MatchedCitationToCitationConverter converter) { this.converter = converter; } }