package org.nextprot.api.core.utils.annot;
import org.nextprot.api.commons.constants.AnnotationCategory;
import org.nextprot.api.commons.constants.PropertyApiModel;
import org.nextprot.api.commons.exception.NextProtException;
import org.nextprot.api.core.domain.BioObject;
import org.nextprot.api.core.domain.CvTerm;
import org.nextprot.api.core.domain.Entry;
import org.nextprot.api.core.domain.EntryUtils;
import org.nextprot.api.core.domain.ExperimentalContext;
import org.nextprot.api.core.domain.annotation.Annotation;
import org.nextprot.api.core.domain.annotation.AnnotationEvidence;
import org.nextprot.api.core.domain.annotation.AnnotationIsoformSpecificity;
import org.nextprot.api.core.domain.annotation.AnnotationProperty;
import org.nextprot.api.core.utils.annot.comp.AnnotationComparators;
import org.nextprot.api.core.utils.annot.merge.impl.AnnotationListMapReduceMerger;
import org.nextprot.api.core.utils.annot.merge.impl.AnnotationListMergerImpl;
import org.nextprot.commons.constants.QualityQualifier;
import java.util.*;
import java.util.stream.Collectors;
public class AnnotationUtils {
private AnnotationUtils() {
throw new IllegalAccessError("Utility class");
}
public static String toString(Annotation a) {
StringBuilder sb = new StringBuilder();
String sep = "\n";
sb.append("isProteoformAnnotation :").append(a.isProteoformAnnotation()).append(sep);
sb.append("getAnnotationHash :").append(a.getAnnotationHash()).append(sep);
sb.append("getAnnotationId :").append(a.getAnnotationId()).append(sep);
sb.append("getAnnotationName :").append(a.getAnnotationName()).append(sep);
sb.append("getUniqueName :").append(a.getUniqueName()).append(sep);
// sb.append("getSubjectName :").append(a.getSubjectName()).append(sep);
sb.append("getDescription :").append(a.getDescription()).append(sep);
sb.append("getBioObject :").append(a.getBioObject()==null ? "null" : a.getBioObject()).append(sep);
sb.append("getSubjectComponents size :").append(a.getSubjectComponents()==null ? 0 : a.getSubjectComponents().size()).append(sep);
if (a.getSubjectComponents()!=null) {
for (String c: a.getSubjectComponents())
sb.append("- component :").append(c).append(sep);
}
sb.append("getApiTypeName :").append(a.getApiTypeName()).append(sep);
sb.append("getCategory :").append(a.getCategory()).append(sep);
sb.append("getCategoryName :").append(a.getCategoryName()).append(sep);
sb.append("getCvTermAccessionCode :").append(a.getCvTermAccessionCode()).append(sep);
sb.append("getCvTermName :").append(a.getCvTermName()).append(sep);
sb.append("getEvidences.size :").append(a.getEvidences()==null ? 0:a.getEvidences().size()).append(sep);
if (a.getEvidences()!=null) {
for (AnnotationEvidence ae: a.getEvidences())
sb.append("- evidence :").append(ae.getEvidenceId()).append(sep);
}
sb.append("getTargetingIsoformsMap size:").append(a.getTargetingIsoformsMap()==null ? 0:a.getTargetingIsoformsMap().size()).append(sep);
//sb.append("").append(a.).append(sep);
return sb.toString();
}
/**
* Filter annotation by its hashes
*/
public static List<Annotation> filterAnnotationsByHashes(Entry entry, Set<String> hashes) {
List<Annotation> annotations = entry.getAnnotations();
if (annotations == null) return null;
return annotations.stream()
.filter(a -> hashes.contains(a.getAnnotationHash()))
.collect(Collectors.toList());
}
/**
* Filter annotation by its category
* WARNING: goldOnly if set to true will change evidences of the annotations (remove any silver evidence if set to true)
*/
public static List<Annotation> filterAnnotationsByCategory(Entry entry, AnnotationCategory annotationCategory, boolean goldOnly) {
return filterAnnotationsByCategory(entry, annotationCategory, true, goldOnly);
}
/**
* Filter annotation of the entry by its category
* WARNING: goldOnly if set to true will change evidences of the annotations (remove any silver evidence if set to true)
*/
public static List<Annotation> filterAnnotationsByCategory(Entry entry, AnnotationCategory annotationCategory, boolean withChildren, boolean goldOnly) {
return filterAnnotationsByCategory(entry.getAnnotations(), annotationCategory, withChildren, goldOnly);
}
/**
* Filter annotation by its category
* @param withChildren if true, annotations having a category which is a child of annotationCategory are included in the list
* @return a list of annotations
*/
public static List<Annotation> filterAnnotationsByCategory(List<Annotation> annotations, AnnotationCategory annotationCategory, boolean withChildren, boolean goldOnly) {
if (annotations == null) return null;
List<Annotation> filteredAnnotations = annotations.stream()
.filter((a) -> {
boolean categoryMatch = (annotationCategory == null) || ((a.getAPICategory() == annotationCategory) || (withChildren && a.getAPICategory().isChildOf(annotationCategory)));
boolean qualityMatch = true;
if(goldOnly){
qualityMatch = "GOLD".equalsIgnoreCase(a.getQualityQualifier());
}
return categoryMatch && qualityMatch;
}).collect(Collectors.toList());
if (goldOnly) {
for(Annotation a : filteredAnnotations) {
List<AnnotationEvidence> evidences = a.getEvidences();
List<AnnotationEvidence> goldEvidences = evidences.stream()
.filter(e -> "GOLD".equalsIgnoreCase(e.getQualityQualifier()) || (e.getQualityQualifier() == null) || e.getQualityQualifier().isEmpty())
.collect(Collectors.toList());
//TODO check if this mutable annotation is not breaken in eh cache!!
a.setEvidences(goldEvidences);
}
}
if (annotationCategory == AnnotationCategory.PHENOTYPIC_VARIATION) {
Collections.sort(filteredAnnotations, AnnotationComparators.newPhenotypicVariationComparator(EntryUtils.getHashAnnotationMap(annotations)));
} else {
Collections.sort(filteredAnnotations, AnnotationComparators.newComparator(annotationCategory));
}
return filteredAnnotations;
}
public static Set<Long> getExperimentalContextIdsForAnnotations(List<Annotation> annotations) {
Set<Long> ecIds = new HashSet<>();
for(Annotation a : annotations){
for(AnnotationEvidence e : a.getEvidences()) {
Long ecId = e.getExperimentalContextId();
if(ecId!=null) ecIds.add(ecId);
}
}
return ecIds;
}
public static List<Annotation> filterAnnotationsBetweenPositions(int start, int end, List<Annotation> annotations, String isoform) {
if(annotations == null) return null;
List<Annotation> finalAnnotations = new ArrayList<>();
for (Annotation annot : annotations) {
if (annot.isAnnotationPositionalForIsoform(isoform)) {
int isoStartPosition, isoEndPosition;
isoStartPosition = annot.getStartPositionForIsoform(isoform);
isoEndPosition = annot.getEndPositionForIsoform(isoform);
if ((isoStartPosition >= start) && (isoEndPosition <= end)) {
finalAnnotations.add(annot);
}
}
}
return finalAnnotations;
}
public static Set<Long> getXrefIdsForAnnotations(List<Annotation> annotations){
if(annotations == null) return null;
Set<Long> xrefIds = new HashSet<>();
for(Annotation a : annotations){
for(AnnotationEvidence e : a.getEvidences()){
if(e.isResourceAXref()){
xrefIds.add(e.getResourceId());
}
}
}
return xrefIds;
}
/*
* Returns a set of xref identifiers in some special cases:
* - "sequence caution" annotation type => xrefs found in "differing sequence" property
* - "cofactor" annotation type => xrefs found in "cofactor" property
*/
public static Set<Long> getXrefIdsFromAnnotations(List<Annotation> annotations){
Set<Long> xrefIds = new HashSet<>();
for(Annotation a : annotations){
if (a.getAPICategory()== AnnotationCategory.SEQUENCE_CAUTION) {
addXrefIdRelatedToAnnotationPropertyName(a, PropertyApiModel.NAME_DIFFERING_SEQUENCE, xrefIds);
}
else if (a.getAPICategory()== AnnotationCategory.COFACTOR) {
xrefIds.add(a.getBioObject().getId());
}
else if (a.getAPICategory()== AnnotationCategory.DISEASE) {
addXrefIdRelatedToAnnotationPropertyName(a, PropertyApiModel.NAME_ALTERNATIVE_DISEASE_TERM, xrefIds);
}
}
return xrefIds;
}
private static void addXrefIdRelatedToAnnotationPropertyName(Annotation a, String propName, Set<Long> xrefIds) {
for (AnnotationProperty p: a.getProperties()) {
if (p.getName().equals(propName) && p.getValueType().equals(PropertyApiModel.VALUE_TYPE_RIF)) {
xrefIds.add(Long.parseLong(p.getValue()));
}
}
}
/*
* Returns a set of xref identifiers corresponding to the interactants which are involved
* in binary interaction annotations and which are not human proteins (xeno interactions)
*/
public static Set<Long> getXrefIdsForInteractionsInteractants(List<Annotation> annotations){
if(annotations == null) return null;
Set<Long> xrefIds = new HashSet<>();
for(Annotation a : annotations){
BioObject bo = a.getBioObject();
if (bo != null && bo.isInteractant()) {
xrefIds.add(bo.getId());
}
}
return xrefIds;
}
public static Set<Long> getPublicationIdsForAnnotations(List<Annotation> annotations) {
Set<Long> publicationIds = new HashSet<>();
for(Annotation a : annotations){
for(AnnotationEvidence e : a.getEvidences()){
if(e.isResourceAPublication()){
publicationIds.add(e.getResourceId());
}
}
}
return publicationIds;
}
/*
* SEQUENCE_CAUTION => property name = differing sequence
* COFACTOR => property name = cofactor
* DISEASE => property name = alternative disease term
*/
public static void convertRelativeEvidencesToProperties(List<Annotation> annotations) {
for (Annotation annot: annotations) {
List<AnnotationEvidence> evidencesToRemove = null;
if (annot.getAPICategory()== AnnotationCategory.SEQUENCE_CAUTION) {
evidencesToRemove = convertRelativeEvidenceToProperty(annot, PropertyApiModel.NAME_DIFFERING_SEQUENCE);
}
else if (annot.getAPICategory()== AnnotationCategory.COFACTOR) {
evidencesToRemove = convertRelativeEvidenceToExternalChemicalBioObject(annot);
}
else if (annot.getAPICategory()== AnnotationCategory.DISEASE) {
evidencesToRemove = convertRelativeEvidenceToProperty(annot, PropertyApiModel.NAME_ALTERNATIVE_DISEASE_TERM);
}
if (evidencesToRemove != null)
annot.getEvidences().removeAll(evidencesToRemove);
}
}
private static List<AnnotationEvidence> convertRelativeEvidenceToProperty(Annotation annot, String propertyName) {
List<AnnotationEvidence> toRemove = new ArrayList<>();
for (AnnotationEvidence evi : annot.getEvidences()) {
if ("relative".equals(evi.getResourceAssociationType())) {
AnnotationProperty p = new AnnotationProperty();
p.setAnnotationId(annot.getAnnotationId());
p.setAccession(evi.getResourceAccession());
p.setName(propertyName);
p.setValue(Long.toString(evi.getResourceId()));
p.setValueType(PropertyApiModel.VALUE_TYPE_RIF);
annot.addProperties(Arrays.asList(p));
toRemove.add(evi);
}
}
return toRemove;
}
private static List<AnnotationEvidence> convertRelativeEvidenceToExternalChemicalBioObject(Annotation annot) {
List<AnnotationEvidence> toRemove = new ArrayList<>();
for (AnnotationEvidence evi : annot.getEvidences()) {
if ("relative".equals(evi.getResourceAssociationType())) {
annot.setBioObject(newExternalChemicalBioObject(evi));
toRemove.add(evi);
}
}
return toRemove;
}
static BioObject newExternalChemicalBioObject(AnnotationEvidence evi) {
BioObject bo = BioObject.external(BioObject.BioType.CHEMICAL, evi.getResourceDb());
bo.setId(evi.getResourceId());
bo.setAccession(evi.getResourceAccession());
String chemicalName = evi.getPropertyValue("name");
if (chemicalName!=null) bo.getProperties().put("chemical name", chemicalName);
return bo;
}
public static List<Annotation> merge(List<Annotation> srcAnnotationList, List<Annotation> destAnnotationList) {
return new AnnotationListMergerImpl().merge(srcAnnotationList, destAnnotationList);
}
public static List<Annotation> mapReduceMerge(List<Annotation> statementAnnotations, List<Annotation> standardAnnotations) {
return new AnnotationListMapReduceMerger().merge(statementAnnotations, standardAnnotations);
}
public static QualityQualifier computeAnnotationQualityBasedOnEvidences(List<AnnotationEvidence> evidences) {
if(evidences == null || evidences.isEmpty()){
throw new NextProtException("Can't compute quality qualifier based on empty / null evidences");
}
for(AnnotationEvidence e : evidences){
if(e.getQualityQualifier() == null){
throw new NextProtException("Found evidence without any quality");
}
QualityQualifier q = QualityQualifier.valueOf(e.getQualityQualifier());
if(q.equals(QualityQualifier.GOLD)) //If one evidence is GOLD return GOLD
return QualityQualifier.GOLD;
}
return QualityQualifier.SILVER;
}
/**
*
* @param annot
* @return
*/
public static String getTermNameWithAncestors(Annotation annot, List<CvTerm> terms) {
StringBuffer sb = new StringBuffer();
for (int i=0; i<terms.size(); i++) {
if (i>0) sb.insert(0, " ยป ");
sb.insert(0, terms.get(i).getName());
}
return sb.toString();
}
/**
* pam, 28 March 2017
* This method returns true for variant annotations that are somehow related to a disease
* otherwise returns false.
* @param annot any annotation
*/
public static boolean isVariantRelatedToDiseaseProperty(Annotation annot, Map<Long,ExperimentalContext> ecs) {
if (AnnotationCategory.VARIANT != annot.getAPICategory()) return false;
// condition 1: if there is a disease in an evidence experimental context, return true > 1'000'000 cases
for (AnnotationEvidence ev : annot.getEvidences()) {
Long ecId = ev.getExperimentalContextId();
if (ecId != null && ecId != 0) {
ExperimentalContext ec = ecs.get(ecId);
if (ec==null) {
System.out.println("WARNING: Could not find ExperimentalContext with id:" + ecId );
} else {
if (ec.getDisease() != null) return true;
}
}
}
// condition 2: if there exists at least 1 variant disease term, return true > 30'000 cases
if (annot.getVariant()!=null && annot.getVariant().getDiseaseTerms() != null) {
if (annot.getVariant().getDiseaseTerms().size() > 0) return true;
}
// condition 3: description matches some patterns, return true > 5'000 cases
if (annot.getDescription()==null) return false;
String desc = annot.getDescription().toLowerCase();
if (desc.contains("allele")) return false;
if (desc.contains("population")) return false;
if (desc.contains("isozyme")) return false;
if (desc.contains("%")) return false;
if (desc.contains("clone")) return false;
if (desc.contains("polymorphism")) return false;
if (desc.startsWith("in")) return true;
if (desc.contains("found in")) return true;
if (desc.contains("associated with")) return true;
// else > 3'000'000 cases
return false;
}
public static boolean isMiscRegionRelatedToInteractions(Annotation annot) {
if (AnnotationCategory.MISCELLANEOUS_REGION != annot.getAPICategory()) return false;
if (annot.getDescription()==null) return false;
String desc = annot.getDescription().toLowerCase();
return desc.contains("bind");
}
/**
* Pam, 22 march 2017
*
* This method is created to display properly the general annotations that are isoform specific.
*
* 1) The general rule in NP1 is that an annotation is displayed as specific
* if there exists a targetingIsoformMap record for the isoform AND the number of targetingIsoformMap
* records for this annotation is inferior to the number of isoforms (which means that the annotation doesn't apply to each isoform)
*
* 2) There is known exception in NP1 for binary interaction annotations. In this case we always have a targetingIsoformMap record
* for each isoform but you must rely on the targetingIsoformMap.getSpecificity() to determine if the annotation
* is specific for an isoform or not. This rule was introduced because we didn't want to penalize IntAct annotations that
* are supported by experiments performed with a known isoform.
*
* 3) Some annotation categories may require a review concerning how they deal with isoform specificity (NP1 & BED pipelines)
* A jira issue will be created...
*
* @param annot
* @param entryIsoformCount
* @return
*/
public static List<String> computeIsoformsDisplayedAsSpecific(Annotation annot, int entryIsoformCount) {
List<String> result = new ArrayList<String>();
if (annot.getTargetingIsoformsMap()==null) return result;
if (AnnotationCategory.BINARY_INTERACTION==annot.getAPICategory()) {
for (AnnotationIsoformSpecificity spec : annot.getTargetingIsoformsMap().values()) {
if ("SPECIFIC".equals(spec.getSpecificity())) result.add(spec.getIsoformAccession());
}
if (result.size()==entryIsoformCount) result = new ArrayList<String>();
} else {
if (annot.getTargetingIsoformsMap().size()<entryIsoformCount) {
for (AnnotationIsoformSpecificity spec : annot.getTargetingIsoformsMap().values()) {
result.add(spec.getIsoformAccession());
}
}
}
return result;
}
}