package org.nextprot.api.core.domain; import org.nextprot.api.commons.constants.AnnotationCategory; import org.nextprot.api.core.domain.annotation.Annotation; import org.nextprot.api.core.domain.annotation.AnnotationEvidence; import org.nextprot.api.core.domain.annotation.AnnotationIsoformSpecificity; import org.nextprot.api.core.service.fluent.EntryConfig; import org.nextprot.api.core.utils.ExperimentalContextUtil; import org.nextprot.api.core.utils.PublicationUtils; import org.nextprot.api.core.utils.XrefUtils; import org.nextprot.api.core.utils.annot.AnnotationUtils; import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; public class EntryUtils implements Serializable{ private static final long serialVersionUID = 3009334685615648172L; public static String getEntryName(String nextprotAccession) { String entryAccession = nextprotAccession; if((nextprotAccession != null) && (nextprotAccession.length() > 0) && (nextprotAccession.contains("-"))){ entryAccession = nextprotAccession.substring(0, nextprotAccession.indexOf("-")); } return entryAccession; } public static Set<Long> getExperimentalContextIds(List<Annotation> annotations) { Set<Long> ecIds = new TreeSet<>(); if (annotations != null) { for (Annotation annot : annotations) { if (annot.getEvidences() != null) { for (AnnotationEvidence evi: annot.getEvidences()) { Long ecId = evi.getExperimentalContextId(); if (ecId != null && ecId != 0) ecIds.add(ecId); } } } } return ecIds; } public static Entry filterEntryBySubPart(Entry entry, EntryConfig config) { List<Annotation> annotations; List<DbXref> xrefs; List<Publication> publications; List<ExperimentalContext> experimentalContexts; // Filter if necessary (config is applied and there are some annotations) if ((config.hasSubPart() || config.hasGoldOnly()) && ((entry.getAnnotations() != null)) && (!entry.getAnnotations().isEmpty())) { annotations = AnnotationUtils.filterAnnotationsByCategory(entry, config.getSubpart(), config.hasGoldOnly()); Set<String> dependencyHashes = new HashSet<String>(); annotations.stream().filter(a -> a.isProteoformAnnotation()).forEach(a -> { for(String subject : a.getSubjectComponents()){ dependencyHashes.add(subject); } dependencyHashes.add((a.getBioObject()).getAnnotationHash()); }); List<Annotation> dependentAnnotations = AnnotationUtils.filterAnnotationsByHashes(entry, dependencyHashes); if(config.hasGoldOnly()){ Map<AnnotationCategory, List<Annotation>> dependentAnnotationsGroupedByCategory = dependentAnnotations.stream().collect(Collectors.groupingBy(Annotation::getAPICategory)); dependentAnnotationsGroupedByCategory.entrySet().forEach(entrySet -> { annotations.addAll(AnnotationUtils.filterAnnotationsByCategory(entrySet.getValue(), entrySet.getKey(), true, config.hasGoldOnly())); }); }else { annotations.addAll(dependentAnnotations); } entry.setAnnotations(annotations); if(!config.hasNoAdditionalReferences()){ //In case we don't care about xrefs, publications and experimental contexts (will be faster) Set<Long> xrefIds = AnnotationUtils.getXrefIdsForAnnotations(annotations); xrefIds.addAll(AnnotationUtils.getXrefIdsForInteractionsInteractants(annotations)); xrefIds.addAll(AnnotationUtils.getXrefIdsFromAnnotations(annotations)); xrefs = XrefUtils.filterXrefsByIds(entry.getXrefs(), xrefIds); publications = PublicationUtils.filterPublicationsByIds(entry.getPublications(), AnnotationUtils.getPublicationIdsForAnnotations(annotations)); experimentalContexts = ExperimentalContextUtil.filterExperimentalContextsByIds(entry.getExperimentalContexts(), AnnotationUtils.getExperimentalContextIdsForAnnotations(annotations)); entry.setXrefs(xrefs); entry.setPublications(publications); entry.setExperimentalContexts(experimentalContexts); } } return entry; } /** * Builds a dictionary (HashMap) where the key is the annotation uniqueName and the value the annotation itself. * @param entry * @return a dictionary of annotations where the key is the annotation uniqueName (= identifier in both NP1 and BED world) */ public static Map<String,Annotation> getUniqueNameAnnotationMap(Entry entry) { Map<String,Annotation> result = new HashMap<String,Annotation>(); for (Annotation annot: entry.getAnnotations()) { result.put(annot.getUniqueName(), annot); } return result; } public static Map<String,Integer> getAnnotationCategoryCountMap(Entry entry) { Map<String,Integer> result = new TreeMap<String,Integer>(); for (Annotation annot: entry.getAnnotations()) { String key = annot.getApiTypeName(); if (!result.containsKey(key)) result.put(key, new Integer(0)); int value = result.get(key).intValue()+1; result.put(key, new Integer(value)); } return result; } private static void printMap(Map map) { for (Object k: map.keySet()) { System.out.println(k + " => " + map.get(k)); } } public static Map<String,Annotation> getHashAnnotationMap(Entry entry) { return getHashAnnotationMap(entry.getAnnotations()); } /** * Builds a dictionary (HashMap) where the key is the annotation annotationHash and the value the annotation itself. * Annotations with no hash are skipped * @param annotations * @return a dictionary of annotations where the key is the annotation hash (= identifier in BED world) */ public static Map<String,Annotation> getHashAnnotationMap(List<Annotation> annotations) { //printMap(getAnnotationCategoryCountMap(entry)); Map<String,Annotation> result = new HashMap<String,Annotation>(); for (Annotation annot: annotations) { if (annot.getAnnotationHash() != null && ! annot.getAnnotationHash().isEmpty()) { result.put(annot.getAnnotationHash(), annot); } } return result; } /** * Returns a dictionary mapping proteoforms to their annotations. * The key is the proteoform, the value the list of annotations related to it. * Note that the method is "isoform "aware": only annotations having * an AnnotationIsoformSpecificity record in getTargetingIsoformsMap * for the isoformAc specified in the parameter are taken into account * @param entry * @param isoformAc * @return */ public static Map<Proteoform,List<Annotation>> getProteoformAnnotationsMap(Entry entry, String isoformAc) { Map<Proteoform,List<Annotation>> result = new HashMap<Proteoform,List<Annotation>>(); for (Annotation annot: entry.getAnnotations()) { if (annot.isProteoformAnnotation()) { if (annot.getTargetingIsoformsMap().containsKey(isoformAc)) { AnnotationIsoformSpecificity spec = annot.getTargetingIsoformsMap().get(isoformAc); Proteoform key = new Proteoform(isoformAc, spec.getName(), annot.getSubjectComponents()); if (!result.containsKey(key)) result.put(key, new ArrayList<Annotation>()); result.get(key).add(annot); } } } return result; } public static List<String> getFunctionInfoWithCanonicalFirst(Entry entry) { List<String> fInfoCanonical = new ArrayList<String>(); List<String> fInfoNonCanonical = new ArrayList<String>(); List<Isoform> isos = entry.getIsoforms(); String canonicalIso = ""; // Get Id of the canonical (swissprotdisplayed) isoform for (Isoform curriso : isos) if(curriso.isCanonicalIsoform()) { canonicalIso = curriso.getUniqueName(); break; } // Get the function annotation and put it in the right basket for (Annotation currannot : entry.getAnnotations()) { if(currannot.getAPICategory().equals(AnnotationCategory.FUNCTION_INFO)) if(currannot.isSpecificForIsoform(canonicalIso)) fInfoCanonical.add(currannot.getDescription()); else fInfoNonCanonical.add(currannot.getDescription()); } // Merge the lists in a final unique list with canonical function first //System.err.println("before: " + fInfoCanonical); fInfoCanonical.addAll(fInfoNonCanonical); //System.err.println("after: " + fInfoCanonical); if (fInfoCanonical.size()==0) { Set<Annotation> goFuncSet = new TreeSet<>((e1, e2) -> { int c; // GOLD over SILVER, then GO_BP over GO_MF, then Alphabetic in term name cf: jira NEXTPROT-1238 c = e1.getQualityQualifier().compareTo(e2.getQualityQualifier()); if (c == 0) c = e1.getCategory().compareTo(e2.getCategory()); if (c == 0) c=e1.getCvTermName().compareTo(e2.getCvTermName()); return c; }); List<Annotation> annots = entry.getAnnotations(); for (Annotation currannot : annots) { String category = currannot.getCategory(); if(category.equals("go biological process") || category.equals("go molecular function")) { goFuncSet.add(currannot); } } int rescnt = 0; for (Annotation resannot : goFuncSet) { // Stick term's name in the returned list if(resannot.getCvTermName().equals("protein binding") && goFuncSet.size() > 3) // avoid unsignificant function if possible continue; if(rescnt++ < 3) // return max 3 first annotation descriptions fInfoCanonical.add(resannot.getCvTermName()); else break; } } return fInfoCanonical; } }