StatementTranformerServiceImpl.java example

Explorer
nextprot-api-master
package org.nextprot.api.etl.service.impl;

import java.text.ParseException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.Function;
import java.util.stream.Collectors;

import org.apache.log4j.Logger;
import org.nextprot.api.commons.constants.AnnotationCategory;
import org.nextprot.api.commons.exception.NPreconditions;
import org.nextprot.api.commons.exception.NextProtException;
import org.nextprot.api.core.domain.Isoform;
import org.nextprot.api.core.service.IsoformService;
import org.nextprot.api.core.utils.IsoformUtils;
import org.nextprot.api.etl.service.StatementTransformerService;
import org.nextprot.api.etl.service.impl.StatementETLServiceImpl.ReportBuilder;
import org.nextprot.api.isoform.mapper.domain.impl.SequenceVariant;
import org.nextprot.api.isoform.mapper.service.IsoformMappingService;
import org.nextprot.api.isoform.mapper.utils.SequenceVariantUtils;
import org.nextprot.commons.statements.Statement;
import org.nextprot.commons.statements.StatementBuilder;
import org.nextprot.commons.statements.StatementField;
import org.nextprot.commons.statements.TargetIsoformSet;
import org.nextprot.commons.statements.TargetIsoformStatementPosition;
import org.nextprot.commons.statements.constants.AnnotationType;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

@Service
public class StatementTranformerServiceImpl implements StatementTransformerService {
	
	private static final Logger LOGGER = Logger.getLogger(StatementTranformerServiceImpl.class);

	@Autowired private IsoformService isoformService;
	@Autowired	private IsoformMappingService isoformMappingService;
	

	@Override
	public Set<Statement> transformStatements(Set<Statement> rawStatements, ReportBuilder report) {

		Map<String, Statement> sourceStatementsById = rawStatements.stream().collect(Collectors.toMap(Statement::getStatementId, Function.identity()));

		Set<Statement> mappedStatementsToLoad = new HashSet<>();

		for (Statement originalStatement : rawStatements) {

			if ((originalStatement.getSubjectStatementIds() != null) && (!originalStatement.getSubjectStatementIds().isEmpty())) {

				String[] subjectStatemendIds = originalStatement.getSubjectStatementIdsArray();
				Set<Statement> subjectStatements = getSubjects(subjectStatemendIds, sourceStatementsById);

				subjectStatements.forEach(s -> s.processed());
				originalStatement.processed();

				String entryAccession = subjectStatements.iterator().next().getValue(StatementField.ENTRY_ACCESSION);

				boolean isIsoSpecific = false;
				String isoformName = validateSubject(subjectStatements);
				String isoformSpecificAccession = null;

				if (isSubjectIsoSpecific(subjectStatements)) {
					if(isoformName != null){
						isIsoSpecific = true;
						String featureName = subjectStatements.iterator().next().getValue(StatementField.ANNOTATION_NAME);
						isoformSpecificAccession = getIsoAccession(featureName, entryAccession);
					}else throw new NextProtException("Something wrong occured when checking for iso specificity");
				}
				
				mappedStatementsToLoad.addAll(transformStatements(originalStatement, sourceStatementsById, subjectStatements, entryAccession, isIsoSpecific, isoformSpecificAccession, report));
			}
		}

		//Currently only includes cases where we have the reciprocal binary interactions 
		Set<Statement> remainingRawStatements = getRemainingRawStatements (rawStatements);
		
		Set<String> distinctCategories = remainingRawStatements.stream().map(s -> s.getValue(StatementField.ANNOTATION_CATEGORY)).distinct().collect(Collectors.toSet());
		
		if(distinctCategories.contains(AnnotationCategory.PHENOTYPIC_VARIATION.getDbAnnotationTypeName())){
			throw new NextProtException("Not expecting phenotypic variation at this stage.");
		}
		LOGGER.info("Remaining categories are " + distinctCategories);
		
		Set<Statement> remainingMappedStatements = transformRemainingRawStatementsToMappedStatements (remainingRawStatements);
		mappedStatementsToLoad.addAll(remainingMappedStatements);	
		
		return mappedStatementsToLoad;
	
	}

	private Set<Statement> transformRemainingRawStatementsToMappedStatements (Set<Statement> remainingRawStatements){

		return remainingRawStatements.stream().map(statement -> {

			TargetIsoformSet targetIsoformForNormalAnnotation = StatementTransformationUtil.computeTargetIsoformsForNormalAnnotation(statement.getValue(StatementField.ENTRY_ACCESSION), isoformService);
			
			return StatementBuilder.createNew().addMap(statement)
					.addField(StatementField.TARGET_ISOFORMS, targetIsoformForNormalAnnotation.serializeToJsonString())
					.removeField(StatementField.STATEMENT_ID) 
					.buildWithAnnotationHash(AnnotationType.ENTRY);
			
		}).collect(Collectors.toSet());

	}


	private Set<Statement> getRemainingRawStatements (Set<Statement> rawStatements){
		return rawStatements.stream().filter(s -> !s.isProcessed()).collect(Collectors.toSet());
	}
	
	private String getIsoAccession (String featureName, String entryAccession){
		
		SequenceVariant sv;
		try {	
			sv = new SequenceVariant(featureName); 
		} catch (ParseException e) {
			throw new NextProtException(e);
		}

		List<Isoform> isoforms = isoformService.findIsoformsByEntryName(entryAccession);
		Isoform isoSpecific = IsoformUtils.getIsoformByName(isoforms, sv.getIsoformName());
		return isoSpecific.getIsoformAccession();
		

	}
	
	private Map<String, List<Statement>> getSubjectsTransformed(Map<String, Statement> sourceStatementsById, Set<Statement> subjectStatements, String nextprotAcession, boolean isIsoSpecific) {

		//In case of entry variants have the target isoform property filled
		Map<String, List<Statement>> variantsOnIsoform = new HashMap<>();

		List<Statement> result = StatementTransformationUtil.getPropagatedStatementsForEntry(isoformMappingService, subjectStatements, nextprotAcession);
		variantsOnIsoform.put(nextprotAcession, result);
		
		return variantsOnIsoform;
	}
	
	
	
	
	Set<Statement> transformStatements(Statement originalStatement, Map<String, Statement> sourceStatementsById, Set<Statement> subjectStatements, String nextprotAcession, boolean isIsoSpecific, String isoSpecificAccession, ReportBuilder report){
		
		Set<Statement> statementsToLoad = new HashSet<>();

		//In case of entry variants have the target isoform property filled
		Map<String, List<Statement>> subjectsTransformedByEntryOrIsoform = getSubjectsTransformed(sourceStatementsById, subjectStatements, nextprotAcession, isIsoSpecific);
				
		for(Map.Entry<String, List<Statement>> entry : subjectsTransformedByEntryOrIsoform.entrySet()) {
				
				List<Statement> subjects = entry.getValue();
				
				if(subjects.isEmpty()){
					report.addWarning("Empty subjects are not allowed for " + entry.getKey() + " skipping... case for 1 variant");
					continue;
				}
				
				String targetIsoformsForObject;
				String targetIsoformsForPhenotype;
					
				String entryAccession = subjects.get(0).getValue(StatementField.ENTRY_ACCESSION);

				List<Isoform> isoforms = isoformService.findIsoformsByEntryName(entryAccession);
				NPreconditions.checkNotEmpty(isoforms, "Isoforms should not be null for " + entryAccession);
				
				List<String> isoformNames = isoforms.stream().map(Isoform::getIsoformAccession).collect(Collectors.toList());
				
				TargetIsoformSet targetIsoformsForPhenotypeSet = StatementTransformationUtil.computeTargetIsoformsForProteoformAnnotation(originalStatement, isoformMappingService, subjects, isIsoSpecific, isoSpecificAccession, isoformNames);
				targetIsoformsForPhenotype = targetIsoformsForPhenotypeSet.serializeToJsonString();
				
				Set<TargetIsoformStatementPosition> targetIsoformsForObjectSet = new TreeSet<>();
				
				//Load objects
				Statement phenotypeIsoStatement;
				Statement objectIsoStatement = null;
				Statement objectStatement = sourceStatementsById.get(originalStatement.getObjectStatementId());

				if(isIsoSpecific){//If it is iso specific
					for(TargetIsoformStatementPosition tisp : targetIsoformsForPhenotypeSet){
						targetIsoformsForObjectSet.add(new TargetIsoformStatementPosition(tisp.getIsoformAccession(), tisp.getSpecificity(), null));
					}
					targetIsoformsForObject = new TargetIsoformSet(targetIsoformsForObjectSet).serializeToJsonString();
				}else {
					targetIsoformsForObject = StatementTransformationUtil.computeTargetIsoformsForNormalAnnotation(objectStatement.getValue(StatementField.ENTRY_ACCESSION), isoformService).serializeToJsonString();
				}

				if(objectStatement != null){
					
					objectStatement.processed();
					objectIsoStatement = StatementBuilder.createNew().addMap(objectStatement)
							.addField(StatementField.TARGET_ISOFORMS, targetIsoformsForObject)
							.buildWithAnnotationHash(AnnotationType.ENTRY);
					
					phenotypeIsoStatement = StatementBuilder.createNew().addMap(originalStatement)
							.addField(StatementField.TARGET_ISOFORMS, targetIsoformsForPhenotype)
							.addSubjects(subjects).addObject(objectIsoStatement)							
							.removeField(StatementField.STATEMENT_ID) 
							.removeField(StatementField.SUBJECT_STATEMENT_IDS) 
							.removeField(StatementField.OBJECT_STATEMENT_IDS) 
							.buildWithAnnotationHash(AnnotationType.ENTRY);

				} else {
					
					phenotypeIsoStatement = StatementBuilder.createNew().addMap(originalStatement)
							.addField(StatementField.TARGET_ISOFORMS, targetIsoformsForPhenotype) // in case of entry
							.addSubjects(subjects)
							.removeField(StatementField.STATEMENT_ID) 
							.removeField(StatementField.SUBJECT_STATEMENT_IDS) 
							.removeField(StatementField.OBJECT_STATEMENT_IDS) 
							.buildWithAnnotationHash(AnnotationType.ENTRY);

				}


				//Load subjects
				statementsToLoad.addAll(subjects);
				
				//Load VPs
				statementsToLoad.add(phenotypeIsoStatement);
				
				//Load objects
				if(objectIsoStatement != null){
					statementsToLoad.add(objectIsoStatement);
				}


		}
		
		return statementsToLoad;

		
	}

	
	
	/**
	 * Returns an exception if there are mixes between subjects
	 * 
	 * @param subjects
	 * @return
	 */
	private static String validateSubject(Set<Statement> subjects) {

		Set<String> isoforms = subjects.stream().map(s -> {
			return s.getValue(StatementField.NEXTPROT_ACCESSION) + "-" + SequenceVariantUtils.getIsoformName(s.getValue(StatementField.ANNOTATION_NAME));
		}).collect(Collectors.toSet());

		if (isoforms.size() != 1) {
			throw new NextProtException("Mixing iso numbers for subjects is not allowed");
		}
		String isoform = isoforms.iterator().next();
		if (isoform == null) {
			throw new NextProtException("Not iso specific subjects are not allowed on isOnSameIsoform");
		}

		return isoform;
	}

	/**
	 * Returns an exception if there are mixes between subjects
	 * 
	 * @param subjects
	 * @return
	 */
	private static boolean isSubjectIsoSpecific(Set<Statement> subjects) {
		int isoSpecificSize = subjects.stream().filter(s -> SequenceVariantUtils.isIsoSpecific(s.getValue(StatementField.ANNOTATION_NAME))).collect(Collectors.toList()).size();
		if (isoSpecificSize == 0) {
			return false;
		} else if (isoSpecificSize == subjects.size()) {
			return true;
		} else {
			throw new NextProtException("Mixing iso specific subjects with non-iso specific variants is not allowed");
		}
	}

	private static Set<Statement> getSubjects(String[] subjectIds, Map<String, Statement> sourceStatementsById) {
		Set<Statement> variants = new HashSet<>();
		for (String subjectId : subjectIds) {
			Statement subjectStatement = sourceStatementsById.get(subjectId);
			if (subjectStatement == null) {
				throw new NextProtException("Subject " + subjectId + " not present in the given list");
			}
			variants.add(subjectStatement);
		}
		return variants;
	}

	public Set<Statement> findSourceStatementsWhereOriginalStatementIsUsedAsSubject(Statement originalStatement, Set<Statement> sourceStatementsWithAModifiedSubject) {
		return sourceStatementsWithAModifiedSubject.stream().filter(sm -> sm.getSubjectStatementIds().contains(originalStatement.getStatementId())).collect(Collectors.toSet());
	}

	public IsoformMappingService getIsoformMappingService() {
		return isoformMappingService;
	}

	public void setIsoformMappingService(IsoformMappingService isoformMappingService) {
		this.isoformMappingService = isoformMappingService;
	}


	public IsoformService getIsoformService() {
		return isoformService;
	}

	public void setIsoformService(IsoformService isoformService) {
		this.isoformService = isoformService;
	}


}