AnnotationBuilder.java example

Explorer
nextprot-api-master
package com.nextprot.api.annotation.builder;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;

import org.apache.log4j.Logger;
import org.nextprot.api.commons.constants.AnnotationCategory;
import org.nextprot.api.commons.constants.IdentifierOffset;
import org.nextprot.api.commons.exception.NextProtException;
import org.nextprot.api.commons.utils.StringUtils;
import org.nextprot.api.core.domain.BioObject;
import org.nextprot.api.core.domain.BioObject.BioType;
import org.nextprot.api.core.domain.CvTerm;
import org.nextprot.api.core.domain.Publication;
import org.nextprot.api.core.domain.annotation.Annotation;
import org.nextprot.api.core.domain.annotation.AnnotationEvidence;
import org.nextprot.api.core.domain.annotation.AnnotationEvidenceProperty;
import org.nextprot.api.core.domain.annotation.AnnotationVariant;
import org.nextprot.api.core.service.MainNamesService;
import org.nextprot.api.core.service.PublicationService;
import org.nextprot.api.core.service.TerminologyService;
import org.nextprot.api.core.utils.annot.AnnotationUtils;
import org.nextprot.commons.statements.Statement;
import org.nextprot.commons.statements.StatementField;

import com.google.common.base.Supplier;

abstract class AnnotationBuilder<T extends Annotation> implements Supplier<T> {

	protected static final Logger LOGGER = Logger.getLogger(AnnotationBuilder.class);

	protected TerminologyService terminologyService = null;
	protected PublicationService publicationService = null;
	protected MainNamesService mainNamesService = null;

	/**
	 * Flag that indicates that the build should throw an Exception at the first error or just log silently
	 */
	static boolean STRICT = false;

	
	private final Set<AnnotationCategory> ANNOT_CATEGORIES_WITHOUT_EVIDENCES = new HashSet<>(Arrays.asList(AnnotationCategory.MAMMALIAN_PHENOTYPE, AnnotationCategory.PROTEIN_PROPERTY));
	
	protected AnnotationBuilder(TerminologyService terminologyService, PublicationService publicationService, MainNamesService mainNamesService){
		this.terminologyService = terminologyService;
		this.publicationService = publicationService;
		this.mainNamesService = mainNamesService;
	}
	
	
	private static AnnotationEvidenceProperty addPropertyIfPresent(String propertyValue, String propertyName) {
		if (propertyValue != null) {
			AnnotationEvidenceProperty prop = new AnnotationEvidenceProperty();
			prop.setPropertyName(propertyName);
			prop.setPropertyValue(propertyValue);
			return prop;
		}
		return null;
	}

	public List<T> buildProteoformIsoformAnnotations (String accession, List<Statement> subjects, List<Statement> proteoformStatements){
		
		List<T> annotations = new ArrayList<>();

		Map<String, List<Statement>> subjectsByAnnotationId = subjects.stream().collect(Collectors.groupingBy(rs -> rs.getValue(StatementField.ANNOTATION_ID)));

		Map<String, List<Statement>> impactStatementsBySubject = proteoformStatements.stream().collect(Collectors.groupingBy(r -> r.getValue(StatementField.SUBJECT_ANNOTATION_IDS)));

		impactStatementsBySubject.keySet().forEach(subjectComponentsIdentifiers -> {
			
			String[] subjectComponentsIdentifiersArray = subjectComponentsIdentifiers.split(",");
			Set<T> subjectVariants = new TreeSet<T>(new Comparator<T>(){
				@Override
				public int compare(T o1, T o2) {
					return o1.getAnnotationName().compareTo(o2.getAnnotationName());
				}
			});

			for(String subjectComponentIdentifier : subjectComponentsIdentifiersArray){

				List<Statement> subjectVariant = subjectsByAnnotationId.get(subjectComponentIdentifier);
				
				if((subjectVariant == null) || (subjectVariant.isEmpty())){
					throw new NextProtException("Not found any subject  identifier:" + subjectComponentIdentifier);
				}
				T variant = buildAnnotation(accession, subjectVariant);
				subjectVariants.add(variant);
			}

			// Impact annotations
			List<Statement> impactStatements = impactStatementsBySubject.get(subjectComponentsIdentifiers);
			List<T> impactAnnotations = buildAnnotationList(accession, impactStatements);
			impactAnnotations.stream().forEach(ia -> {
				
				String name = subjectVariants.stream().map(v -> v.getAnnotationName()).collect(Collectors.joining(" + ")).toString();
				ia.setSubjectComponents(Arrays.asList(subjectComponentsIdentifiersArray));
			});

			annotations.addAll(impactAnnotations);

		});
		
		return annotations;
		
	}

	protected List<AnnotationEvidence> buildAnnotationEvidences(List<Statement> Statements) {

		//Ensures there is no repeated evidence!
		Set<AnnotationEvidence> evidencesSet = Statements.stream().map(s -> {
			AnnotationEvidence evidence = new AnnotationEvidence();
			
			evidence.setResourceType("database");//TODO to be checked with Amos and Lydie
			
			evidence.setResourceAssociationType("evidence");
			evidence.setQualityQualifier(s.getValue(StatementField.EVIDENCE_QUALITY));
			
			
			setEvidenceResourceId(evidence, s);

			AnnotationEvidenceProperty evidenceProperty = addPropertyIfPresent(s.getValue(StatementField.EVIDENCE_INTENSITY), "intensity");
			AnnotationEvidenceProperty expContextSubjectProteinOrigin = addPropertyIfPresent(s.getValue(StatementField.ANNOTATION_SUBJECT_SPECIES), "subject-protein-origin");
			AnnotationEvidenceProperty expContextObjectProteinOrigin = addPropertyIfPresent(s.getValue(StatementField.ANNOTATION_OBJECT_SPECIES), "object-protein-origin");

			//Set properties which are not null
			evidence.setProperties(
					Arrays.asList(evidenceProperty, expContextSubjectProteinOrigin, expContextObjectProteinOrigin)
						.stream().filter(p -> p != null)
						.collect(Collectors.toList())
						);
			
			
			 
			 String statementEvidenceCode = s.getValue(StatementField.EVIDENCE_CODE);
			 evidence.setEvidenceCodeAC(statementEvidenceCode);
			 evidence.setAssignedBy(s.getValue(StatementField.ASSIGNED_BY));
			 evidence.setAssignmentMethod(s.getValue(StatementField.ASSIGMENT_METHOD));
			 evidence.setResourceType(s.getValue(StatementField.RESOURCE_TYPE));
			 evidence.setEvidenceCodeOntology("evidence-code-ontology-cv");
			 evidence.setNegativeEvidence("true".equalsIgnoreCase(s.getValue(StatementField.IS_NEGATIVE)));
			 
			 if(statementEvidenceCode != null){
				 CvTerm term = terminologyService.findCvTermByAccession(statementEvidenceCode);
				 if(term != null){
					 evidence.setEvidenceCodeName(term.getName());
				 }else {
					 throw new NextProtException("Not found " + statementEvidenceCode + " in the database");
				 }
			 }
			 
			 evidence.setNote(s.getValue(StatementField.EVIDENCE_NOTE));
			
			//TODO create experimental contexts!
			 
			return evidence;
		}).collect(Collectors.toSet());
		

		//Ensures there is no repeated evidence!
		evidencesSet.forEach(e -> {
			long generatedEvidenceId = IdentifierOffset.EVIDENCE_ID_COUNTER_FOR_STATEMENTS.incrementAndGet();
			e.setEvidenceId(generatedEvidenceId);
		});
		
		List<AnnotationEvidence> evidencesFiltered = evidencesSet.stream().filter(e -> e.getResourceId() != -2).collect(Collectors.toList());
		if(evidencesFiltered.size() < evidencesSet.size()){
			int total = evidencesSet.size();
			int removed = total - evidencesFiltered.size();
			LOGGER.debug("Removed " + removed + " evidence because no resource id from a total of " + total);
		}
		
		return new ArrayList<>(evidencesFiltered);

	}


	abstract void setIsoformName(T annotation, String statement);

	abstract void setIsoformTargeting(T annotation, Statement statement);

	protected void setVariantAttributes(T annotation, Statement variantStatement) {

		String original = variantStatement.getValue(StatementField.VARIANT_ORIGINAL_AMINO_ACID);
		String variant = variantStatement.getValue(StatementField.VARIANT_VARIATION_AMINO_ACID);
		AnnotationVariant annotationVariant = new AnnotationVariant(original, variant);
		annotation.setVariant(annotationVariant);

	}
	


	void setEvidenceResourceId(AnnotationEvidence evidence, Statement statement) {

		String referenceDB = statement.getValue(StatementField.REFERENCE_DATABASE);
		String referenceAC = statement.getValue(StatementField.REFERENCE_ACCESSION);
		
		Publication publication = publicationService.findPublicationByDatabaseAndAccession(referenceDB, referenceAC);
		if (publication == null) {
			//Set -1 if not exists. Should never be the case 
			evidence.setResourceId((Long) throwErrorOrReturn("can 't find publication db:" + referenceDB + " id:" + referenceAC, -1L));
		}
		else {
			evidence.setResourceId(publication.getPublicationId());
		}
			
			
	}

	
	protected T buildAnnotation(String isoformName, List<Statement> flatStatements) {
		List<T> annotations = buildAnnotationList(isoformName, flatStatements);
		if(annotations.isEmpty() || annotations.size() > 1){
			throw new NextProtException("Expecting 1 annotation but found " + annotations.size() + " from " + flatStatements.size());
		}
		return annotations.get(0);
	}
	
	public List<T> buildAnnotationList(String isoformName, List<Statement> flatStatements) {

		List<T> annotations = new ArrayList<>();
		Map<String, List<Statement>> flatStatementsByAnnotationHash = flatStatements.stream().collect(Collectors.groupingBy(rs -> rs.getValue(StatementField.ANNOTATION_ID)));

		flatStatementsByAnnotationHash.entrySet().forEach(entry -> {

			T annotation = get();
			
			List<Statement> statements = entry.getValue();
			

			Statement firstStatement = statements.get(0);

			annotation.setAnnotationHash(firstStatement.getValue(StatementField.ANNOTATION_ID));
			//annotation.setAnnotationName(firstStatement.getValue(StatementField.ANNOTATION_NAME));

			AnnotationCategory category = AnnotationCategory.getDecamelizedAnnotationTypeName(StringUtils.camelToKebabCase(firstStatement.getValue(StatementField.ANNOTATION_CATEGORY)));
			annotation.setAnnotationCategory(category);

			if(category.equals(AnnotationCategory.VARIANT) || category.equals(AnnotationCategory.MUTAGENESIS)){
				setVariantAttributes(annotation, firstStatement);
			}
			setIsoformTargeting(annotation, firstStatement);

			setIsoformName(annotation, isoformName);

			annotation.setDescription(firstStatement.getValue(StatementField.ANNOT_DESCRIPTION));

			String cvTermAccession = firstStatement.getValue(StatementField.ANNOT_CV_TERM_ACCESSION);

			//Set the evidences if not Mammalian phenotype or Protein Property https://issues.isb-sib.ch/browse/BIOEDITOR-466
			if(!ANNOT_CATEGORIES_WITHOUT_EVIDENCES.contains(category)){
				annotation.setEvidences(buildAnnotationEvidences(statements));
				
				//TODO Remove this when you are able to do XREFs
				if(((annotation.getEvidences() == null) || ((annotation.getEvidences().isEmpty()))) && (category.equals(AnnotationCategory.VARIANT) || category.equals(AnnotationCategory.MUTAGENESIS))){
					annotation.setQualityQualifier("GOLD");//All variants from BED are GOLD, and this is a special case when we don't have evidences for VDs.
				}else {
					annotation.setQualityQualifier(AnnotationUtils.computeAnnotationQualityBasedOnEvidences(annotation.getEvidences()).name());
				}
				
			}else {
				
				//Case of Protein propert and mammalian phenotypes
				annotation.setEvidences(new ArrayList<AnnotationEvidence>());
				
				boolean foundGold = statements.stream().anyMatch(s -> s.getValue(StatementField.EVIDENCE_QUALITY).equalsIgnoreCase("GOLD"));
				if(foundGold){
					annotation.setQualityQualifier("GOLD");
				}else {
					annotation.setQualityQualifier("SILVER");
				}
			}

			if(cvTermAccession != null && !cvTermAccession.isEmpty()){

				annotation.setCvTermAccessionCode(cvTermAccession);

				CvTerm cvTerm = terminologyService.findCvTermByAccession(cvTermAccession);
				if(cvTerm != null){
					annotation.setCvTermName(cvTerm.getName());
					annotation.setCvApiName(cvTerm.getOntology());
					annotation.setCvTermDescription(cvTerm.getDescription());

					if(category.equals(AnnotationCategory.PROTEIN_PROPERTY)){
						//according to https://issues.isb-sib.ch/browse/BIOEDITOR-466
						annotation.setDescription(cvTerm.getDescription());
					}else if(category.equals(AnnotationCategory.MAMMALIAN_PHENOTYPE)){
						annotation.setDescription("Relative to modification-effect annotations");
					}
					
				}else {
					LOGGER.error("cv term was expected to be found " + cvTermAccession);
					annotation.setCvTermName(firstStatement.getValue(StatementField.ANNOT_CV_TERM_NAME));
					annotation.setCvApiName(firstStatement.getValue(StatementField.ANNOT_CV_TERM_TERMINOLOGY));
				}
							
			}

			annotation.setAnnotationHash(firstStatement.getValue(StatementField.ANNOTATION_ID));
			annotation.setAnnotationName(firstStatement.getValue(StatementField.ANNOTATION_NAME));
	
			//Check this with PAM (does it need to be a human readable stuff)
			annotation.setUniqueName(firstStatement.getValue(StatementField.ANNOTATION_ID)); //Does it need a name?
			
			String bioObjectAnnotationHash = firstStatement.getValue(StatementField.OBJECT_ANNOTATION_IDS);
			String bioObjectAccession = firstStatement.getValue(StatementField.BIOLOGICAL_OBJECT_ACCESSION);
			String bot = firstStatement.getValue(StatementField.BIOLOGICAL_OBJECT_TYPE);

			if ((bioObjectAnnotationHash != null) && (bioObjectAnnotationHash.length() > 0) || (bioObjectAccession != null && (bioObjectAccession.length() > 0))) {

				BioObject bioObject = null;

				if (AnnotationCategory.BINARY_INTERACTION.equals(annotation.getAPICategory())) {
					if(bioObjectAccession.startsWith("NX_") && BioType.PROTEIN.name().equalsIgnoreCase(bot)){
						// note that if we handle BioType.PROTEIN_ISOFORM in the future, we should
						// add the property isoformName as well, see how it's done in BinaryInteraction2Annotation.newBioObject()
						bioObject = BioObject.internal(BioType.PROTEIN);
						bioObject.setAccession(bioObjectAccession);						
						bioObject.putPropertyNameValue("geneName", firstStatement.getValue(StatementField.BIOLOGICAL_OBJECT_NAME));
						String proteinName = (String)mainNamesService.findIsoformOrEntryMainName().get(bioObjectAccession).getName();
						bioObject.putPropertyNameValue("proteinName", proteinName);
						bioObject.putPropertyNameValue("url", "https://www.nextprot.org/entry/" + bioObjectAccession + "/interactions");
						
					}else {
						throw new NextProtException("Binary Interaction only expects to be a nextprot entry NX_ and found " + bioObjectAccession + " with type " + bot);
					}
					
				}else if (AnnotationCategory.PHENOTYPIC_VARIATION.equals(annotation.getAPICategory())) {
						bioObject = BioObject.internal(BioType.ENTRY_ANNOTATION);
						bioObject.setAnnotationHash(bioObjectAnnotationHash);
				}else {
					throw new NextProtException("Category not expected for bioobject " + annotation.getAPICategory());
				}

				annotation.setBioObject(bioObject);
			}

			annotations.add(annotation);
			
			

		});
		return annotations;
	}
	
	
	
	
	private Object throwErrorOrReturn(String message, Object returnObject){

		LOGGER.error(message);
		if(STRICT){
			throw new NextProtException(message);
		}else return returnObject;

	}

}