package org.nextprot.api.core.domain;
import org.nextprot.api.commons.constants.AnnotationCategory;
import org.nextprot.api.core.domain.annotation.Annotation;
import org.nextprot.api.core.domain.annotation.AnnotationEvidence;
import org.nextprot.api.core.domain.annotation.AnnotationIsoformSpecificity;
import org.nextprot.api.core.service.fluent.EntryConfig;
import org.nextprot.api.core.utils.ExperimentalContextUtil;
import org.nextprot.api.core.utils.PublicationUtils;
import org.nextprot.api.core.utils.XrefUtils;
import org.nextprot.api.core.utils.annot.AnnotationUtils;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
public class EntryUtils implements Serializable{
private static final long serialVersionUID = 3009334685615648172L;
public static String getEntryName(String nextprotAccession) {
String entryAccession = nextprotAccession;
if((nextprotAccession != null) && (nextprotAccession.length() > 0) && (nextprotAccession.contains("-"))){
entryAccession = nextprotAccession.substring(0, nextprotAccession.indexOf("-"));
}
return entryAccession;
}
public static Set<Long> getExperimentalContextIds(List<Annotation> annotations) {
Set<Long> ecIds = new TreeSet<>();
if (annotations != null) {
for (Annotation annot : annotations) {
if (annot.getEvidences() != null) {
for (AnnotationEvidence evi: annot.getEvidences()) {
Long ecId = evi.getExperimentalContextId();
if (ecId != null && ecId != 0) ecIds.add(ecId);
}
}
}
}
return ecIds;
}
public static Entry filterEntryBySubPart(Entry entry, EntryConfig config) {
List<Annotation> annotations;
List<DbXref> xrefs;
List<Publication> publications;
List<ExperimentalContext> experimentalContexts;
// Filter if necessary (config is applied and there are some annotations)
if ((config.hasSubPart() || config.hasGoldOnly()) && ((entry.getAnnotations() != null)) && (!entry.getAnnotations().isEmpty())) {
annotations = AnnotationUtils.filterAnnotationsByCategory(entry, config.getSubpart(), config.hasGoldOnly());
Set<String> dependencyHashes = new HashSet<String>();
annotations.stream().filter(a -> a.isProteoformAnnotation()).forEach(a -> {
for(String subject : a.getSubjectComponents()){
dependencyHashes.add(subject);
}
dependencyHashes.add((a.getBioObject()).getAnnotationHash());
});
List<Annotation> dependentAnnotations = AnnotationUtils.filterAnnotationsByHashes(entry, dependencyHashes);
if(config.hasGoldOnly()){
Map<AnnotationCategory, List<Annotation>> dependentAnnotationsGroupedByCategory = dependentAnnotations.stream().collect(Collectors.groupingBy(Annotation::getAPICategory));
dependentAnnotationsGroupedByCategory.entrySet().forEach(entrySet -> {
annotations.addAll(AnnotationUtils.filterAnnotationsByCategory(entrySet.getValue(), entrySet.getKey(), true, config.hasGoldOnly()));
});
}else {
annotations.addAll(dependentAnnotations);
}
entry.setAnnotations(annotations);
if(!config.hasNoAdditionalReferences()){ //In case we don't care about xrefs, publications and experimental contexts (will be faster)
Set<Long> xrefIds = AnnotationUtils.getXrefIdsForAnnotations(annotations);
xrefIds.addAll(AnnotationUtils.getXrefIdsForInteractionsInteractants(annotations));
xrefIds.addAll(AnnotationUtils.getXrefIdsFromAnnotations(annotations));
xrefs = XrefUtils.filterXrefsByIds(entry.getXrefs(), xrefIds);
publications = PublicationUtils.filterPublicationsByIds(entry.getPublications(), AnnotationUtils.getPublicationIdsForAnnotations(annotations));
experimentalContexts = ExperimentalContextUtil.filterExperimentalContextsByIds(entry.getExperimentalContexts(), AnnotationUtils.getExperimentalContextIdsForAnnotations(annotations));
entry.setXrefs(xrefs);
entry.setPublications(publications);
entry.setExperimentalContexts(experimentalContexts);
}
}
return entry;
}
/**
* Builds a dictionary (HashMap) where the key is the annotation uniqueName and the value the annotation itself.
* @param entry
* @return a dictionary of annotations where the key is the annotation uniqueName (= identifier in both NP1 and BED world)
*/
public static Map<String,Annotation> getUniqueNameAnnotationMap(Entry entry) {
Map<String,Annotation> result = new HashMap<String,Annotation>();
for (Annotation annot: entry.getAnnotations()) {
result.put(annot.getUniqueName(), annot);
}
return result;
}
public static Map<String,Integer> getAnnotationCategoryCountMap(Entry entry) {
Map<String,Integer> result = new TreeMap<String,Integer>();
for (Annotation annot: entry.getAnnotations()) {
String key = annot.getApiTypeName();
if (!result.containsKey(key)) result.put(key, new Integer(0));
int value = result.get(key).intValue()+1;
result.put(key, new Integer(value));
}
return result;
}
private static void printMap(Map map) {
for (Object k: map.keySet()) {
System.out.println(k + " => " + map.get(k));
}
}
public static Map<String,Annotation> getHashAnnotationMap(Entry entry) {
return getHashAnnotationMap(entry.getAnnotations());
}
/**
* Builds a dictionary (HashMap) where the key is the annotation annotationHash and the value the annotation itself.
* Annotations with no hash are skipped
* @param annotations
* @return a dictionary of annotations where the key is the annotation hash (= identifier in BED world)
*/
public static Map<String,Annotation> getHashAnnotationMap(List<Annotation> annotations) {
//printMap(getAnnotationCategoryCountMap(entry));
Map<String,Annotation> result = new HashMap<String,Annotation>();
for (Annotation annot: annotations) {
if (annot.getAnnotationHash() != null && ! annot.getAnnotationHash().isEmpty()) {
result.put(annot.getAnnotationHash(), annot);
}
}
return result;
}
/**
* Returns a dictionary mapping proteoforms to their annotations.
* The key is the proteoform, the value the list of annotations related to it.
* Note that the method is "isoform "aware": only annotations having
* an AnnotationIsoformSpecificity record in getTargetingIsoformsMap
* for the isoformAc specified in the parameter are taken into account
* @param entry
* @param isoformAc
* @return
*/
public static Map<Proteoform,List<Annotation>> getProteoformAnnotationsMap(Entry entry, String isoformAc) {
Map<Proteoform,List<Annotation>> result = new HashMap<Proteoform,List<Annotation>>();
for (Annotation annot: entry.getAnnotations()) {
if (annot.isProteoformAnnotation()) {
if (annot.getTargetingIsoformsMap().containsKey(isoformAc)) {
AnnotationIsoformSpecificity spec = annot.getTargetingIsoformsMap().get(isoformAc);
Proteoform key = new Proteoform(isoformAc, spec.getName(), annot.getSubjectComponents());
if (!result.containsKey(key)) result.put(key, new ArrayList<Annotation>());
result.get(key).add(annot);
}
}
}
return result;
}
public static List<String> getFunctionInfoWithCanonicalFirst(Entry entry) {
List<String> fInfoCanonical = new ArrayList<String>();
List<String> fInfoNonCanonical = new ArrayList<String>();
List<Isoform> isos = entry.getIsoforms();
String canonicalIso = "";
// Get Id of the canonical (swissprotdisplayed) isoform
for (Isoform curriso : isos)
if(curriso.isCanonicalIsoform()) {
canonicalIso = curriso.getUniqueName();
break;
}
// Get the function annotation and put it in the right basket
for (Annotation currannot : entry.getAnnotations()) {
if(currannot.getAPICategory().equals(AnnotationCategory.FUNCTION_INFO))
if(currannot.isSpecificForIsoform(canonicalIso))
fInfoCanonical.add(currannot.getDescription());
else
fInfoNonCanonical.add(currannot.getDescription());
}
// Merge the lists in a final unique list with canonical function first
//System.err.println("before: " + fInfoCanonical);
fInfoCanonical.addAll(fInfoNonCanonical);
//System.err.println("after: " + fInfoCanonical);
if (fInfoCanonical.size()==0) {
Set<Annotation> goFuncSet = new TreeSet<>((e1, e2) -> {
int c; // GOLD over SILVER, then GO_BP over GO_MF, then Alphabetic in term name cf: jira NEXTPROT-1238
c = e1.getQualityQualifier().compareTo(e2.getQualityQualifier());
if (c == 0) c = e1.getCategory().compareTo(e2.getCategory());
if (c == 0) c=e1.getCvTermName().compareTo(e2.getCvTermName());
return c;
});
List<Annotation> annots = entry.getAnnotations();
for (Annotation currannot : annots) {
String category = currannot.getCategory();
if(category.equals("go biological process") || category.equals("go molecular function")) {
goFuncSet.add(currannot); }
}
int rescnt = 0;
for (Annotation resannot : goFuncSet) {
// Stick term's name in the returned list
if(resannot.getCvTermName().equals("protein binding") && goFuncSet.size() > 3) // avoid unsignificant function if possible
continue;
if(rescnt++ < 3) // return max 3 first annotation descriptions
fInfoCanonical.add(resannot.getCvTermName());
else break;
}
}
return fInfoCanonical;
}
}