package eu.dnetlib.iis.wf.citationmatching.input; import java.util.List; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import eu.dnetlib.iis.citationmatching.schemas.BasicMetadata; import eu.dnetlib.iis.citationmatching.schemas.DocumentMetadata; import eu.dnetlib.iis.citationmatching.schemas.ReferenceMetadata; import eu.dnetlib.iis.transformers.metadatamerger.schemas.ExtractedDocumentMetadataMergedWithOriginal; /** * Converter of {@link ExtractedDocumentMetadataMergedWithOriginal} object to * {@link DocumentMetadata} * * @author madryk */ public class DocumentToCitationDocumentConverter { //------------------------ LOGIC -------------------------- /** * Converts {@link ExtractedDocumentMetadataMergedWithOriginal} to {@link DocumentMetadata}.<br/> * Notice that returned document metadata will contain author ids list in {@link BasicMetadata#getAuthors()} * instead of author names. */ public DocumentMetadata convert(ExtractedDocumentMetadataMergedWithOriginal sourceDocument) { Preconditions.checkNotNull(sourceDocument); DocumentMetadata destDocument = DocumentMetadata.newBuilder() .setId(sourceDocument.getId()) .setBasicMetadata(convertBasicMetadata(sourceDocument)) .setReferences(convertReferences(sourceDocument.getReferences())) .build(); return destDocument; } //------------------------ PRIVATE -------------------------- private BasicMetadata convertBasicMetadata(ExtractedDocumentMetadataMergedWithOriginal sourceDocument) { return BasicMetadata.newBuilder() .setAuthors((sourceDocument.getAuthorIds() == null) ? Lists.newArrayList() : sourceDocument.getAuthorIds()) .setJournal(sourceDocument.getJournal()) .setPages(convertRange(sourceDocument.getPages())) .setTitle(sourceDocument.getTitle()) .setYear((sourceDocument.getYear() == null) ? null : sourceDocument.getYear().toString()) .build(); } private List<ReferenceMetadata> convertReferences(List<eu.dnetlib.iis.metadataextraction.schemas.ReferenceMetadata> sourceReferences) { List<ReferenceMetadata> destReferences = Lists.newArrayList(); if (sourceReferences == null) { return destReferences; } sourceReferences.forEach(sourceReference -> destReferences.add(convertReference(sourceReference))); return destReferences; } private ReferenceMetadata convertReference(eu.dnetlib.iis.metadataextraction.schemas.ReferenceMetadata sourceRefMeta) { ReferenceMetadata refMeta = ReferenceMetadata.newBuilder() .setBasicMetadata(convertReferenceBasicMetadata(sourceRefMeta.getBasicMetadata())) .setPosition(sourceRefMeta.getPosition()) .setRawText(sourceRefMeta.getText()) .build(); return refMeta; } private BasicMetadata convertReferenceBasicMetadata(eu.dnetlib.iis.metadataextraction.schemas.ReferenceBasicMetadata sourceRefBasicMeta) { return BasicMetadata.newBuilder() .setAuthors((sourceRefBasicMeta.getAuthors() == null) ? Lists.newArrayList() : sourceRefBasicMeta.getAuthors()) .setJournal(sourceRefBasicMeta.getSource()) .setPages(convertRange(sourceRefBasicMeta.getPages())) .setTitle(sourceRefBasicMeta.getTitle()) .setYear(sourceRefBasicMeta.getYear()) .build(); } private String convertRange(eu.dnetlib.iis.metadataextraction.schemas.Range range) { return (range == null) ? null : range.getStart() + "-" + range.getEnd(); } }