package com.nextprot.api.annotation.builder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
import org.apache.log4j.Logger;
import org.nextprot.api.commons.constants.AnnotationCategory;
import org.nextprot.api.commons.constants.IdentifierOffset;
import org.nextprot.api.commons.exception.NextProtException;
import org.nextprot.api.commons.utils.StringUtils;
import org.nextprot.api.core.domain.BioObject;
import org.nextprot.api.core.domain.BioObject.BioType;
import org.nextprot.api.core.domain.CvTerm;
import org.nextprot.api.core.domain.Publication;
import org.nextprot.api.core.domain.annotation.Annotation;
import org.nextprot.api.core.domain.annotation.AnnotationEvidence;
import org.nextprot.api.core.domain.annotation.AnnotationEvidenceProperty;
import org.nextprot.api.core.domain.annotation.AnnotationVariant;
import org.nextprot.api.core.service.MainNamesService;
import org.nextprot.api.core.service.PublicationService;
import org.nextprot.api.core.service.TerminologyService;
import org.nextprot.api.core.utils.annot.AnnotationUtils;
import org.nextprot.commons.statements.Statement;
import org.nextprot.commons.statements.StatementField;
import com.google.common.base.Supplier;
abstract class AnnotationBuilder<T extends Annotation> implements Supplier<T> {
protected static final Logger LOGGER = Logger.getLogger(AnnotationBuilder.class);
protected TerminologyService terminologyService = null;
protected PublicationService publicationService = null;
protected MainNamesService mainNamesService = null;
/**
* Flag that indicates that the build should throw an Exception at the first error or just log silently
*/
static boolean STRICT = false;
private final Set<AnnotationCategory> ANNOT_CATEGORIES_WITHOUT_EVIDENCES = new HashSet<>(Arrays.asList(AnnotationCategory.MAMMALIAN_PHENOTYPE, AnnotationCategory.PROTEIN_PROPERTY));
protected AnnotationBuilder(TerminologyService terminologyService, PublicationService publicationService, MainNamesService mainNamesService){
this.terminologyService = terminologyService;
this.publicationService = publicationService;
this.mainNamesService = mainNamesService;
}
private static AnnotationEvidenceProperty addPropertyIfPresent(String propertyValue, String propertyName) {
if (propertyValue != null) {
AnnotationEvidenceProperty prop = new AnnotationEvidenceProperty();
prop.setPropertyName(propertyName);
prop.setPropertyValue(propertyValue);
return prop;
}
return null;
}
public List<T> buildProteoformIsoformAnnotations (String accession, List<Statement> subjects, List<Statement> proteoformStatements){
List<T> annotations = new ArrayList<>();
Map<String, List<Statement>> subjectsByAnnotationId = subjects.stream().collect(Collectors.groupingBy(rs -> rs.getValue(StatementField.ANNOTATION_ID)));
Map<String, List<Statement>> impactStatementsBySubject = proteoformStatements.stream().collect(Collectors.groupingBy(r -> r.getValue(StatementField.SUBJECT_ANNOTATION_IDS)));
impactStatementsBySubject.keySet().forEach(subjectComponentsIdentifiers -> {
String[] subjectComponentsIdentifiersArray = subjectComponentsIdentifiers.split(",");
Set<T> subjectVariants = new TreeSet<T>(new Comparator<T>(){
@Override
public int compare(T o1, T o2) {
return o1.getAnnotationName().compareTo(o2.getAnnotationName());
}
});
for(String subjectComponentIdentifier : subjectComponentsIdentifiersArray){
List<Statement> subjectVariant = subjectsByAnnotationId.get(subjectComponentIdentifier);
if((subjectVariant == null) || (subjectVariant.isEmpty())){
throw new NextProtException("Not found any subject identifier:" + subjectComponentIdentifier);
}
T variant = buildAnnotation(accession, subjectVariant);
subjectVariants.add(variant);
}
// Impact annotations
List<Statement> impactStatements = impactStatementsBySubject.get(subjectComponentsIdentifiers);
List<T> impactAnnotations = buildAnnotationList(accession, impactStatements);
impactAnnotations.stream().forEach(ia -> {
String name = subjectVariants.stream().map(v -> v.getAnnotationName()).collect(Collectors.joining(" + ")).toString();
ia.setSubjectComponents(Arrays.asList(subjectComponentsIdentifiersArray));
});
annotations.addAll(impactAnnotations);
});
return annotations;
}
protected List<AnnotationEvidence> buildAnnotationEvidences(List<Statement> Statements) {
//Ensures there is no repeated evidence!
Set<AnnotationEvidence> evidencesSet = Statements.stream().map(s -> {
AnnotationEvidence evidence = new AnnotationEvidence();
evidence.setResourceType("database");//TODO to be checked with Amos and Lydie
evidence.setResourceAssociationType("evidence");
evidence.setQualityQualifier(s.getValue(StatementField.EVIDENCE_QUALITY));
setEvidenceResourceId(evidence, s);
AnnotationEvidenceProperty evidenceProperty = addPropertyIfPresent(s.getValue(StatementField.EVIDENCE_INTENSITY), "intensity");
AnnotationEvidenceProperty expContextSubjectProteinOrigin = addPropertyIfPresent(s.getValue(StatementField.ANNOTATION_SUBJECT_SPECIES), "subject-protein-origin");
AnnotationEvidenceProperty expContextObjectProteinOrigin = addPropertyIfPresent(s.getValue(StatementField.ANNOTATION_OBJECT_SPECIES), "object-protein-origin");
//Set properties which are not null
evidence.setProperties(
Arrays.asList(evidenceProperty, expContextSubjectProteinOrigin, expContextObjectProteinOrigin)
.stream().filter(p -> p != null)
.collect(Collectors.toList())
);
String statementEvidenceCode = s.getValue(StatementField.EVIDENCE_CODE);
evidence.setEvidenceCodeAC(statementEvidenceCode);
evidence.setAssignedBy(s.getValue(StatementField.ASSIGNED_BY));
evidence.setAssignmentMethod(s.getValue(StatementField.ASSIGMENT_METHOD));
evidence.setResourceType(s.getValue(StatementField.RESOURCE_TYPE));
evidence.setEvidenceCodeOntology("evidence-code-ontology-cv");
evidence.setNegativeEvidence("true".equalsIgnoreCase(s.getValue(StatementField.IS_NEGATIVE)));
if(statementEvidenceCode != null){
CvTerm term = terminologyService.findCvTermByAccession(statementEvidenceCode);
if(term != null){
evidence.setEvidenceCodeName(term.getName());
}else {
throw new NextProtException("Not found " + statementEvidenceCode + " in the database");
}
}
evidence.setNote(s.getValue(StatementField.EVIDENCE_NOTE));
//TODO create experimental contexts!
return evidence;
}).collect(Collectors.toSet());
//Ensures there is no repeated evidence!
evidencesSet.forEach(e -> {
long generatedEvidenceId = IdentifierOffset.EVIDENCE_ID_COUNTER_FOR_STATEMENTS.incrementAndGet();
e.setEvidenceId(generatedEvidenceId);
});
List<AnnotationEvidence> evidencesFiltered = evidencesSet.stream().filter(e -> e.getResourceId() != -2).collect(Collectors.toList());
if(evidencesFiltered.size() < evidencesSet.size()){
int total = evidencesSet.size();
int removed = total - evidencesFiltered.size();
LOGGER.debug("Removed " + removed + " evidence because no resource id from a total of " + total);
}
return new ArrayList<>(evidencesFiltered);
}
abstract void setIsoformName(T annotation, String statement);
abstract void setIsoformTargeting(T annotation, Statement statement);
protected void setVariantAttributes(T annotation, Statement variantStatement) {
String original = variantStatement.getValue(StatementField.VARIANT_ORIGINAL_AMINO_ACID);
String variant = variantStatement.getValue(StatementField.VARIANT_VARIATION_AMINO_ACID);
AnnotationVariant annotationVariant = new AnnotationVariant(original, variant);
annotation.setVariant(annotationVariant);
}
void setEvidenceResourceId(AnnotationEvidence evidence, Statement statement) {
String referenceDB = statement.getValue(StatementField.REFERENCE_DATABASE);
String referenceAC = statement.getValue(StatementField.REFERENCE_ACCESSION);
Publication publication = publicationService.findPublicationByDatabaseAndAccession(referenceDB, referenceAC);
if (publication == null) {
//Set -1 if not exists. Should never be the case
evidence.setResourceId((Long) throwErrorOrReturn("can 't find publication db:" + referenceDB + " id:" + referenceAC, -1L));
}
else {
evidence.setResourceId(publication.getPublicationId());
}
}
protected T buildAnnotation(String isoformName, List<Statement> flatStatements) {
List<T> annotations = buildAnnotationList(isoformName, flatStatements);
if(annotations.isEmpty() || annotations.size() > 1){
throw new NextProtException("Expecting 1 annotation but found " + annotations.size() + " from " + flatStatements.size());
}
return annotations.get(0);
}
public List<T> buildAnnotationList(String isoformName, List<Statement> flatStatements) {
List<T> annotations = new ArrayList<>();
Map<String, List<Statement>> flatStatementsByAnnotationHash = flatStatements.stream().collect(Collectors.groupingBy(rs -> rs.getValue(StatementField.ANNOTATION_ID)));
flatStatementsByAnnotationHash.entrySet().forEach(entry -> {
T annotation = get();
List<Statement> statements = entry.getValue();
Statement firstStatement = statements.get(0);
annotation.setAnnotationHash(firstStatement.getValue(StatementField.ANNOTATION_ID));
//annotation.setAnnotationName(firstStatement.getValue(StatementField.ANNOTATION_NAME));
AnnotationCategory category = AnnotationCategory.getDecamelizedAnnotationTypeName(StringUtils.camelToKebabCase(firstStatement.getValue(StatementField.ANNOTATION_CATEGORY)));
annotation.setAnnotationCategory(category);
if(category.equals(AnnotationCategory.VARIANT) || category.equals(AnnotationCategory.MUTAGENESIS)){
setVariantAttributes(annotation, firstStatement);
}
setIsoformTargeting(annotation, firstStatement);
setIsoformName(annotation, isoformName);
annotation.setDescription(firstStatement.getValue(StatementField.ANNOT_DESCRIPTION));
String cvTermAccession = firstStatement.getValue(StatementField.ANNOT_CV_TERM_ACCESSION);
//Set the evidences if not Mammalian phenotype or Protein Property https://issues.isb-sib.ch/browse/BIOEDITOR-466
if(!ANNOT_CATEGORIES_WITHOUT_EVIDENCES.contains(category)){
annotation.setEvidences(buildAnnotationEvidences(statements));
//TODO Remove this when you are able to do XREFs
if(((annotation.getEvidences() == null) || ((annotation.getEvidences().isEmpty()))) && (category.equals(AnnotationCategory.VARIANT) || category.equals(AnnotationCategory.MUTAGENESIS))){
annotation.setQualityQualifier("GOLD");//All variants from BED are GOLD, and this is a special case when we don't have evidences for VDs.
}else {
annotation.setQualityQualifier(AnnotationUtils.computeAnnotationQualityBasedOnEvidences(annotation.getEvidences()).name());
}
}else {
//Case of Protein propert and mammalian phenotypes
annotation.setEvidences(new ArrayList<AnnotationEvidence>());
boolean foundGold = statements.stream().anyMatch(s -> s.getValue(StatementField.EVIDENCE_QUALITY).equalsIgnoreCase("GOLD"));
if(foundGold){
annotation.setQualityQualifier("GOLD");
}else {
annotation.setQualityQualifier("SILVER");
}
}
if(cvTermAccession != null && !cvTermAccession.isEmpty()){
annotation.setCvTermAccessionCode(cvTermAccession);
CvTerm cvTerm = terminologyService.findCvTermByAccession(cvTermAccession);
if(cvTerm != null){
annotation.setCvTermName(cvTerm.getName());
annotation.setCvApiName(cvTerm.getOntology());
annotation.setCvTermDescription(cvTerm.getDescription());
if(category.equals(AnnotationCategory.PROTEIN_PROPERTY)){
//according to https://issues.isb-sib.ch/browse/BIOEDITOR-466
annotation.setDescription(cvTerm.getDescription());
}else if(category.equals(AnnotationCategory.MAMMALIAN_PHENOTYPE)){
annotation.setDescription("Relative to modification-effect annotations");
}
}else {
LOGGER.error("cv term was expected to be found " + cvTermAccession);
annotation.setCvTermName(firstStatement.getValue(StatementField.ANNOT_CV_TERM_NAME));
annotation.setCvApiName(firstStatement.getValue(StatementField.ANNOT_CV_TERM_TERMINOLOGY));
}
}
annotation.setAnnotationHash(firstStatement.getValue(StatementField.ANNOTATION_ID));
annotation.setAnnotationName(firstStatement.getValue(StatementField.ANNOTATION_NAME));
//Check this with PAM (does it need to be a human readable stuff)
annotation.setUniqueName(firstStatement.getValue(StatementField.ANNOTATION_ID)); //Does it need a name?
String bioObjectAnnotationHash = firstStatement.getValue(StatementField.OBJECT_ANNOTATION_IDS);
String bioObjectAccession = firstStatement.getValue(StatementField.BIOLOGICAL_OBJECT_ACCESSION);
String bot = firstStatement.getValue(StatementField.BIOLOGICAL_OBJECT_TYPE);
if ((bioObjectAnnotationHash != null) && (bioObjectAnnotationHash.length() > 0) || (bioObjectAccession != null && (bioObjectAccession.length() > 0))) {
BioObject bioObject = null;
if (AnnotationCategory.BINARY_INTERACTION.equals(annotation.getAPICategory())) {
if(bioObjectAccession.startsWith("NX_") && BioType.PROTEIN.name().equalsIgnoreCase(bot)){
// note that if we handle BioType.PROTEIN_ISOFORM in the future, we should
// add the property isoformName as well, see how it's done in BinaryInteraction2Annotation.newBioObject()
bioObject = BioObject.internal(BioType.PROTEIN);
bioObject.setAccession(bioObjectAccession);
bioObject.putPropertyNameValue("geneName", firstStatement.getValue(StatementField.BIOLOGICAL_OBJECT_NAME));
String proteinName = (String)mainNamesService.findIsoformOrEntryMainName().get(bioObjectAccession).getName();
bioObject.putPropertyNameValue("proteinName", proteinName);
bioObject.putPropertyNameValue("url", "https://www.nextprot.org/entry/" + bioObjectAccession + "/interactions");
}else {
throw new NextProtException("Binary Interaction only expects to be a nextprot entry NX_ and found " + bioObjectAccession + " with type " + bot);
}
}else if (AnnotationCategory.PHENOTYPIC_VARIATION.equals(annotation.getAPICategory())) {
bioObject = BioObject.internal(BioType.ENTRY_ANNOTATION);
bioObject.setAnnotationHash(bioObjectAnnotationHash);
}else {
throw new NextProtException("Category not expected for bioobject " + annotation.getAPICategory());
}
annotation.setBioObject(bioObject);
}
annotations.add(annotation);
});
return annotations;
}
private Object throwErrorOrReturn(String message, Object returnObject){
LOGGER.error(message);
if(STRICT){
throw new NextProtException(message);
}else return returnObject;
}
}