package eu.dnetlib.iis.wf.export.actionmanager.module; import java.util.Collections; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import eu.dnetlib.actionmanager.actions.AtomicAction; import eu.dnetlib.actionmanager.common.Agent; import eu.dnetlib.data.proto.FieldTypeProtos.ExtraInfo; import eu.dnetlib.data.proto.OafProtos.Oaf; import eu.dnetlib.data.proto.OafProtos.OafEntity; import eu.dnetlib.data.proto.TypeProtos.Type; import eu.dnetlib.iis.common.InfoSpaceConstants; import eu.dnetlib.iis.common.citations.schemas.CitationEntry; import eu.dnetlib.iis.common.model.extrainfo.ExtraInfoConstants; import eu.dnetlib.iis.common.model.extrainfo.citations.BlobCitationEntry; import eu.dnetlib.iis.common.model.extrainfo.converter.CitationsExtraInfoConverter; import eu.dnetlib.iis.export.schemas.Citations; import eu.dnetlib.iis.wf.export.actionmanager.cfg.StaticConfigurationProvider; /** * {@link Citations} based action builder module. * * @author mhorst * */ public class CitationsActionBuilderModuleFactory extends AbstractActionBuilderFactory<Citations> { private static final String EXTRA_INFO_NAME = ExtraInfoConstants.NAME_CITATIONS; private static final String EXTRA_INFO_TYPOLOGY = ExtraInfoConstants.TYPOLOGY_CITATIONS; // ------------------------ CONSTRUCTORS -------------------------- public CitationsActionBuilderModuleFactory() { super(AlgorithmName.document_referencedDocuments); } // ------------------------ LOGIC --------------------------------- @Override public ActionBuilderModule<Citations> instantiate(Configuration config, Agent agent, String actionSetId) { return new CitationActionBuilderModule(provideTrustLevelThreshold(config), agent, actionSetId); } // ------------------------ INNER CLASS -------------------------- class CitationActionBuilderModule extends AbstractBuilderModule<Citations> { private CitationsExtraInfoConverter converter = new CitationsExtraInfoConverter(); // ------------------------ CONSTRUCTORS -------------------------- /** * @param trustLevelThreshold trust level threshold or null when all records should be exported * @param agent action manager agent details * @param actionSetId action set identifier */ public CitationActionBuilderModule(Float trustLevelThreshold, Agent agent, String actionSetId) { super(trustLevelThreshold, buildInferenceProvenance(), agent, actionSetId); } // ------------------------ LOGIC -------------------------- @Override public List<AtomicAction> build(Citations object) { Oaf oaf = buildOAFCitations(object); if (oaf != null) { return getActionFactory().createUpdateActions(getActionSetId(), getAgent(), object.getDocumentId().toString(), Type.result, oaf.toByteArray()); } else { return Collections.emptyList(); } } // ------------------------ PRIVATE -------------------------- /** * Builds {@link Oaf} object containing document statistics. */ private Oaf buildOAFCitations(Citations source) { if (CollectionUtils.isNotEmpty(source.getCitations())) { OafEntity.Builder entityBuilder = OafEntity.newBuilder(); if (source.getDocumentId() != null) { entityBuilder.setId(source.getDocumentId().toString()); } ExtraInfo.Builder extraInfoBuilder = ExtraInfo.newBuilder(); extraInfoBuilder.setValue(converter.serialize(normalize(source.getCitations()))); extraInfoBuilder.setName(EXTRA_INFO_NAME); extraInfoBuilder.setTypology(EXTRA_INFO_TYPOLOGY); extraInfoBuilder.setProvenance(this.getInferenceProvenance()); extraInfoBuilder.setTrust(StaticConfigurationProvider.ACTION_TRUST_0_9); entityBuilder.addExtraInfo(extraInfoBuilder.build()); entityBuilder.setType(Type.result); return buildOaf(entityBuilder.build()); } // fallback return null; } /** * Performs confidence level normalization. Removes empty lists. * Removes 50| prefix from publication identifier. * * @param source list of citations to be normalized * @return {@link BlobCitationEntry} objects having confidence level value normalized */ private SortedSet<BlobCitationEntry> normalize(List<CitationEntry> source) { if (source != null) { SortedSet<BlobCitationEntry> results = new TreeSet<BlobCitationEntry>(); for (CitationEntry currentEntry : source) { if (currentEntry.getExternalDestinationDocumentIds().isEmpty()) { currentEntry.setExternalDestinationDocumentIds(null); } if (currentEntry.getDestinationDocumentId() != null) { currentEntry.setDestinationDocumentId( StringUtils.split(currentEntry.getDestinationDocumentId().toString(), InfoSpaceConstants.ROW_PREFIX_SEPARATOR)[1]); } results.add(CitationsActionBuilderModuleUtils.build(currentEntry)); } return results; } else { return null; } } } }