package eu.dnetlib.iis.wf.export.actionmanager.module; import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import eu.dnetlib.actionmanager.actions.AtomicAction; import eu.dnetlib.actionmanager.common.Agent; import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; import eu.dnetlib.data.proto.OafProtos.Oaf; import eu.dnetlib.data.proto.OafProtos.OafEntity; import eu.dnetlib.data.proto.ResultProtos.Result; import eu.dnetlib.data.proto.ResultProtos.Result.Metadata; import eu.dnetlib.data.proto.TypeProtos.Type; import eu.dnetlib.iis.common.InfoSpaceConstants; import eu.dnetlib.iis.documentsclassification.schemas.DocumentClass; import eu.dnetlib.iis.documentsclassification.schemas.DocumentClasses; import eu.dnetlib.iis.documentsclassification.schemas.DocumentToDocumentClasses; /** * {@link DocumentToDocumentClasses} based action builder module. * * @author mhorst * */ public class DocumentToDocumentClassesActionBuilderModuleFactory extends AbstractActionBuilderFactory<DocumentToDocumentClasses> { // ------------------------ CONSTRUCTORS -------------------------- public DocumentToDocumentClassesActionBuilderModuleFactory() { super(AlgorithmName.document_classes); } // ------------------------ LOGIC --------------------------------- @Override public ActionBuilderModule<DocumentToDocumentClasses> instantiate(Configuration config, Agent agent, String actionSetId) { return new DocumentToDocumentClassesActionBuilderModule(provideTrustLevelThreshold(config), agent, actionSetId); } // ------------------------ INNER CLASS --------------------------- class DocumentToDocumentClassesActionBuilderModule extends AbstractBuilderModule<DocumentToDocumentClasses> { // ------------------------ CONSTRUCTORS -------------------------- /** * @param trustLevelThreshold trust level threshold or null when all records should be exported * @param agent action manager agent details * @param actionSetId action set identifier */ public DocumentToDocumentClassesActionBuilderModule(Float trustLevelThreshold, Agent agent, String actionSetId) { super(trustLevelThreshold, buildInferenceProvenance(), agent, actionSetId); } // ------------------------ LOGIC -------------------------- @Override public List<AtomicAction> build(DocumentToDocumentClasses object) { Oaf oaf = buildOAFClasses(object); if (oaf != null) { return getActionFactory().createUpdateActions(getActionSetId(), getAgent(), object.getDocumentId().toString(), Type.result, oaf.toByteArray()); } else { return Collections.emptyList(); } } // ------------------------ PRIVATE -------------------------- /** * Builds OAF object containing document classes. */ private Oaf buildOAFClasses(DocumentToDocumentClasses source) { if (source.getClasses() != null) { List<? extends StructuredProperty> classificationSubjects = convertAvroToProtoBuff( source.getClasses()); if (CollectionUtils.isNotEmpty(classificationSubjects)) { OafEntity.Builder entityBuilder = OafEntity.newBuilder(); Result.Builder resultBuilder = Result.newBuilder(); Metadata.Builder metaBuilder = Metadata.newBuilder(); metaBuilder.addAllSubject(classificationSubjects); resultBuilder.setMetadata(metaBuilder.build()); entityBuilder.setResult(resultBuilder.build()); entityBuilder.setId(source.getDocumentId().toString()); entityBuilder.setType(Type.result); return buildOaf(entityBuilder.build()); } } // fallback return null; } private List<? extends StructuredProperty> convertAvroToProtoBuff(DocumentClasses source) { List<StructuredProperty> list = new ArrayList<StructuredProperty>(); if (CollectionUtils.isNotEmpty(source.getArXivClasses())) { list.addAll(convertAvroToProtoBuff(source.getArXivClasses(), InfoSpaceConstants.SEMANTIC_CLASS_TAXONOMIES_ARXIV)); } if (CollectionUtils.isNotEmpty(source.getDDCClasses())) { list.addAll(convertAvroToProtoBuff(source.getDDCClasses(), InfoSpaceConstants.SEMANTIC_CLASS_TAXONOMIES_DDC)); } if (CollectionUtils.isNotEmpty(source.getWoSClasses())) { list.addAll(convertAvroToProtoBuff(source.getWoSClasses(), InfoSpaceConstants.SEMANTIC_CLASS_TAXONOMIES_WOS)); } if (CollectionUtils.isNotEmpty(source.getMeshEuroPMCClasses())) { list.addAll(convertAvroToProtoBuff(source.getMeshEuroPMCClasses(), InfoSpaceConstants.SEMANTIC_CLASS_TAXONOMIES_MESHEUROPMC)); } if (CollectionUtils.isNotEmpty(source.getACMClasses())) { list.addAll(convertAvroToProtoBuff(source.getACMClasses(), InfoSpaceConstants.SEMANTIC_CLASS_TAXONOMIES_ACM)); } return list; } private List<StructuredProperty> convertAvroToProtoBuff(List<DocumentClass> source, String taxonomyName) { List<StructuredProperty> results = new ArrayList<StructuredProperty>(); for (DocumentClass current : source) { try { StructuredProperty result = convertAvroToProtoBuff(current, taxonomyName); if (result!=null) { results.add(result); } } catch (TrustLevelThresholdExceededException e) { // no need to log, we just do not attach result } } return results; } private StructuredProperty convertAvroToProtoBuff(DocumentClass source, String taxonomyName) throws TrustLevelThresholdExceededException { if (source != null && CollectionUtils.isNotEmpty(source.getClassLabels())) { StructuredProperty.Builder builder = StructuredProperty.newBuilder(); Qualifier.Builder qualifierBuilder = Qualifier.newBuilder(); qualifierBuilder.setSchemeid(InfoSpaceConstants.SEMANTIC_SCHEME_DNET_CLASSIFICATION_TAXONOMIES); qualifierBuilder.setSchemename(InfoSpaceConstants.SEMANTIC_SCHEME_DNET_CLASSIFICATION_TAXONOMIES); qualifierBuilder.setClassid(taxonomyName); qualifierBuilder.setClassname(taxonomyName); builder.setQualifier(qualifierBuilder.build()); builder.setValue( StringUtils.join(source.getClassLabels(), InfoSpaceConstants.CLASSIFICATION_HIERARCHY_SEPARATOR)); float confidenceLevel = source.getConfidenceLevel(); builder.setDataInfo(buildInference(confidenceLevel < 1 ? confidenceLevel : 1)); return builder.build(); } else { return null; } } } }