package org.nextprot.api.etl.service.impl; import java.text.ParseException; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.function.Function; import java.util.stream.Collectors; import org.apache.log4j.Logger; import org.nextprot.api.commons.constants.AnnotationCategory; import org.nextprot.api.commons.exception.NPreconditions; import org.nextprot.api.commons.exception.NextProtException; import org.nextprot.api.core.domain.Isoform; import org.nextprot.api.core.service.IsoformService; import org.nextprot.api.core.utils.IsoformUtils; import org.nextprot.api.etl.service.StatementTransformerService; import org.nextprot.api.etl.service.impl.StatementETLServiceImpl.ReportBuilder; import org.nextprot.api.isoform.mapper.domain.impl.SequenceVariant; import org.nextprot.api.isoform.mapper.service.IsoformMappingService; import org.nextprot.api.isoform.mapper.utils.SequenceVariantUtils; import org.nextprot.commons.statements.Statement; import org.nextprot.commons.statements.StatementBuilder; import org.nextprot.commons.statements.StatementField; import org.nextprot.commons.statements.TargetIsoformSet; import org.nextprot.commons.statements.TargetIsoformStatementPosition; import org.nextprot.commons.statements.constants.AnnotationType; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @Service public class StatementTranformerServiceImpl implements StatementTransformerService { private static final Logger LOGGER = Logger.getLogger(StatementTranformerServiceImpl.class); @Autowired private IsoformService isoformService; @Autowired private IsoformMappingService isoformMappingService; @Override public Set<Statement> transformStatements(Set<Statement> rawStatements, ReportBuilder report) { Map<String, Statement> sourceStatementsById = rawStatements.stream().collect(Collectors.toMap(Statement::getStatementId, Function.identity())); Set<Statement> mappedStatementsToLoad = new HashSet<>(); for (Statement originalStatement : rawStatements) { if ((originalStatement.getSubjectStatementIds() != null) && (!originalStatement.getSubjectStatementIds().isEmpty())) { String[] subjectStatemendIds = originalStatement.getSubjectStatementIdsArray(); Set<Statement> subjectStatements = getSubjects(subjectStatemendIds, sourceStatementsById); subjectStatements.forEach(s -> s.processed()); originalStatement.processed(); String entryAccession = subjectStatements.iterator().next().getValue(StatementField.ENTRY_ACCESSION); boolean isIsoSpecific = false; String isoformName = validateSubject(subjectStatements); String isoformSpecificAccession = null; if (isSubjectIsoSpecific(subjectStatements)) { if(isoformName != null){ isIsoSpecific = true; String featureName = subjectStatements.iterator().next().getValue(StatementField.ANNOTATION_NAME); isoformSpecificAccession = getIsoAccession(featureName, entryAccession); }else throw new NextProtException("Something wrong occured when checking for iso specificity"); } mappedStatementsToLoad.addAll(transformStatements(originalStatement, sourceStatementsById, subjectStatements, entryAccession, isIsoSpecific, isoformSpecificAccession, report)); } } //Currently only includes cases where we have the reciprocal binary interactions Set<Statement> remainingRawStatements = getRemainingRawStatements (rawStatements); Set<String> distinctCategories = remainingRawStatements.stream().map(s -> s.getValue(StatementField.ANNOTATION_CATEGORY)).distinct().collect(Collectors.toSet()); if(distinctCategories.contains(AnnotationCategory.PHENOTYPIC_VARIATION.getDbAnnotationTypeName())){ throw new NextProtException("Not expecting phenotypic variation at this stage."); } LOGGER.info("Remaining categories are " + distinctCategories); Set<Statement> remainingMappedStatements = transformRemainingRawStatementsToMappedStatements (remainingRawStatements); mappedStatementsToLoad.addAll(remainingMappedStatements); return mappedStatementsToLoad; } private Set<Statement> transformRemainingRawStatementsToMappedStatements (Set<Statement> remainingRawStatements){ return remainingRawStatements.stream().map(statement -> { TargetIsoformSet targetIsoformForNormalAnnotation = StatementTransformationUtil.computeTargetIsoformsForNormalAnnotation(statement.getValue(StatementField.ENTRY_ACCESSION), isoformService); return StatementBuilder.createNew().addMap(statement) .addField(StatementField.TARGET_ISOFORMS, targetIsoformForNormalAnnotation.serializeToJsonString()) .removeField(StatementField.STATEMENT_ID) .buildWithAnnotationHash(AnnotationType.ENTRY); }).collect(Collectors.toSet()); } private Set<Statement> getRemainingRawStatements (Set<Statement> rawStatements){ return rawStatements.stream().filter(s -> !s.isProcessed()).collect(Collectors.toSet()); } private String getIsoAccession (String featureName, String entryAccession){ SequenceVariant sv; try { sv = new SequenceVariant(featureName); } catch (ParseException e) { throw new NextProtException(e); } List<Isoform> isoforms = isoformService.findIsoformsByEntryName(entryAccession); Isoform isoSpecific = IsoformUtils.getIsoformByName(isoforms, sv.getIsoformName()); return isoSpecific.getIsoformAccession(); } private Map<String, List<Statement>> getSubjectsTransformed(Map<String, Statement> sourceStatementsById, Set<Statement> subjectStatements, String nextprotAcession, boolean isIsoSpecific) { //In case of entry variants have the target isoform property filled Map<String, List<Statement>> variantsOnIsoform = new HashMap<>(); List<Statement> result = StatementTransformationUtil.getPropagatedStatementsForEntry(isoformMappingService, subjectStatements, nextprotAcession); variantsOnIsoform.put(nextprotAcession, result); return variantsOnIsoform; } Set<Statement> transformStatements(Statement originalStatement, Map<String, Statement> sourceStatementsById, Set<Statement> subjectStatements, String nextprotAcession, boolean isIsoSpecific, String isoSpecificAccession, ReportBuilder report){ Set<Statement> statementsToLoad = new HashSet<>(); //In case of entry variants have the target isoform property filled Map<String, List<Statement>> subjectsTransformedByEntryOrIsoform = getSubjectsTransformed(sourceStatementsById, subjectStatements, nextprotAcession, isIsoSpecific); for(Map.Entry<String, List<Statement>> entry : subjectsTransformedByEntryOrIsoform.entrySet()) { List<Statement> subjects = entry.getValue(); if(subjects.isEmpty()){ report.addWarning("Empty subjects are not allowed for " + entry.getKey() + " skipping... case for 1 variant"); continue; } String targetIsoformsForObject; String targetIsoformsForPhenotype; String entryAccession = subjects.get(0).getValue(StatementField.ENTRY_ACCESSION); List<Isoform> isoforms = isoformService.findIsoformsByEntryName(entryAccession); NPreconditions.checkNotEmpty(isoforms, "Isoforms should not be null for " + entryAccession); List<String> isoformNames = isoforms.stream().map(Isoform::getIsoformAccession).collect(Collectors.toList()); TargetIsoformSet targetIsoformsForPhenotypeSet = StatementTransformationUtil.computeTargetIsoformsForProteoformAnnotation(originalStatement, isoformMappingService, subjects, isIsoSpecific, isoSpecificAccession, isoformNames); targetIsoformsForPhenotype = targetIsoformsForPhenotypeSet.serializeToJsonString(); Set<TargetIsoformStatementPosition> targetIsoformsForObjectSet = new TreeSet<>(); //Load objects Statement phenotypeIsoStatement; Statement objectIsoStatement = null; Statement objectStatement = sourceStatementsById.get(originalStatement.getObjectStatementId()); if(isIsoSpecific){//If it is iso specific for(TargetIsoformStatementPosition tisp : targetIsoformsForPhenotypeSet){ targetIsoformsForObjectSet.add(new TargetIsoformStatementPosition(tisp.getIsoformAccession(), tisp.getSpecificity(), null)); } targetIsoformsForObject = new TargetIsoformSet(targetIsoformsForObjectSet).serializeToJsonString(); }else { targetIsoformsForObject = StatementTransformationUtil.computeTargetIsoformsForNormalAnnotation(objectStatement.getValue(StatementField.ENTRY_ACCESSION), isoformService).serializeToJsonString(); } if(objectStatement != null){ objectStatement.processed(); objectIsoStatement = StatementBuilder.createNew().addMap(objectStatement) .addField(StatementField.TARGET_ISOFORMS, targetIsoformsForObject) .buildWithAnnotationHash(AnnotationType.ENTRY); phenotypeIsoStatement = StatementBuilder.createNew().addMap(originalStatement) .addField(StatementField.TARGET_ISOFORMS, targetIsoformsForPhenotype) .addSubjects(subjects).addObject(objectIsoStatement) .removeField(StatementField.STATEMENT_ID) .removeField(StatementField.SUBJECT_STATEMENT_IDS) .removeField(StatementField.OBJECT_STATEMENT_IDS) .buildWithAnnotationHash(AnnotationType.ENTRY); } else { phenotypeIsoStatement = StatementBuilder.createNew().addMap(originalStatement) .addField(StatementField.TARGET_ISOFORMS, targetIsoformsForPhenotype) // in case of entry .addSubjects(subjects) .removeField(StatementField.STATEMENT_ID) .removeField(StatementField.SUBJECT_STATEMENT_IDS) .removeField(StatementField.OBJECT_STATEMENT_IDS) .buildWithAnnotationHash(AnnotationType.ENTRY); } //Load subjects statementsToLoad.addAll(subjects); //Load VPs statementsToLoad.add(phenotypeIsoStatement); //Load objects if(objectIsoStatement != null){ statementsToLoad.add(objectIsoStatement); } } return statementsToLoad; } /** * Returns an exception if there are mixes between subjects * * @param subjects * @return */ private static String validateSubject(Set<Statement> subjects) { Set<String> isoforms = subjects.stream().map(s -> { return s.getValue(StatementField.NEXTPROT_ACCESSION) + "-" + SequenceVariantUtils.getIsoformName(s.getValue(StatementField.ANNOTATION_NAME)); }).collect(Collectors.toSet()); if (isoforms.size() != 1) { throw new NextProtException("Mixing iso numbers for subjects is not allowed"); } String isoform = isoforms.iterator().next(); if (isoform == null) { throw new NextProtException("Not iso specific subjects are not allowed on isOnSameIsoform"); } return isoform; } /** * Returns an exception if there are mixes between subjects * * @param subjects * @return */ private static boolean isSubjectIsoSpecific(Set<Statement> subjects) { int isoSpecificSize = subjects.stream().filter(s -> SequenceVariantUtils.isIsoSpecific(s.getValue(StatementField.ANNOTATION_NAME))).collect(Collectors.toList()).size(); if (isoSpecificSize == 0) { return false; } else if (isoSpecificSize == subjects.size()) { return true; } else { throw new NextProtException("Mixing iso specific subjects with non-iso specific variants is not allowed"); } } private static Set<Statement> getSubjects(String[] subjectIds, Map<String, Statement> sourceStatementsById) { Set<Statement> variants = new HashSet<>(); for (String subjectId : subjectIds) { Statement subjectStatement = sourceStatementsById.get(subjectId); if (subjectStatement == null) { throw new NextProtException("Subject " + subjectId + " not present in the given list"); } variants.add(subjectStatement); } return variants; } public Set<Statement> findSourceStatementsWhereOriginalStatementIsUsedAsSubject(Statement originalStatement, Set<Statement> sourceStatementsWithAModifiedSubject) { return sourceStatementsWithAModifiedSubject.stream().filter(sm -> sm.getSubjectStatementIds().contains(originalStatement.getStatementId())).collect(Collectors.toSet()); } public IsoformMappingService getIsoformMappingService() { return isoformMappingService; } public void setIsoformMappingService(IsoformMappingService isoformMappingService) { this.isoformMappingService = isoformMappingService; } public IsoformService getIsoformService() { return isoformService; } public void setIsoformService(IsoformService isoformService) { this.isoformService = isoformService; } }