package org.nextprot.api.tasks.solr.indexer.entry.impl; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.TreeSet; import org.apache.log4j.Logger; import org.nextprot.api.commons.constants.TerminologyCv; import org.nextprot.api.commons.utils.Tree; import org.nextprot.api.core.domain.Entry; import org.nextprot.api.core.domain.Family; import org.nextprot.api.core.domain.CvTerm; import org.nextprot.api.core.domain.annotation.Annotation; import org.nextprot.api.core.domain.annotation.AnnotationEvidence; import org.nextprot.api.core.utils.TerminologyUtils; import org.nextprot.api.solr.index.EntryIndex.Fields; import org.nextprot.api.tasks.solr.GenerateSolrIndex; import org.nextprot.api.tasks.solr.indexer.entry.EntryFieldBuilder; import org.nextprot.api.tasks.solr.indexer.entry.FieldBuilder; @EntryFieldBuilder public class CVFieldBuilder extends FieldBuilder { protected Logger logger = Logger.getLogger(GenerateSolrIndex.class); @Override protected void init(Entry entry) { Set <String> cv_acs = new HashSet<String>(); Set <String> cv_ancestors_acs = new HashSet<String>(); Set <String> cv_synonyms = new HashSet<String>(); Set <String> top_acs = new HashSet<>(Arrays.asList("CVAN_0001","CVAN_0002","CVAN_0011")); // top level ancestors (Annotation, feature, and ROI) // CV accessions List<Annotation> annots = entry.getAnnotations(); int cvac_cnt = 0; boolean allnegative; for (Annotation currannot : annots) { String category = currannot.getCategory(); if(!category.equals("tissue specificity")) { // tissue-specific CVs are indexed under 'expression' String cvac = currannot.getCvTermAccessionCode(); if (cvac == null) continue; if (cvac.isEmpty()) logger.warn("CVterm accession empty in " + category + " for " + entry.getUniqueName()); else { if(category.startsWith("go ")) { allnegative = true; List<AnnotationEvidence> evlist = currannot.getEvidences(); // We don't index negative annotations for(AnnotationEvidence ev : evlist) allnegative = allnegative & ev.isNegativeEvidence(); if(allnegative == true) continue; } if(!this.isGold() || currannot.getQualityQualifier().equals("GOLD")) { addField(Fields.CV_ACS, cvac); cvac_cnt++; cv_acs.add(cvac); // No duplicates: this is a Set, will be used for synonyms and ancestors addField(Fields.CV_NAMES, currannot.getCvTermName()); } } } } // Families (why not part of Annotations ?) for (Family family : entry.getOverview().getFamilies()) { addField(Fields.CV_ACS, family.getAccession()); addField(Fields.CV_NAMES, family.getName() + " family"); cv_acs.add(family.getAccession()); } // Final CV acs, ancestors and synonyms //System.err.println("cumputing CV ancestors for " + cv_acs.size() + " terms..."); Tree<CvTerm> tree = null; //Set<String> ancestors2 = new TreeSet<String>(); for (String cvac : cv_acs) { CvTerm term = this.terminologyservice.findCvTermByAccession(cvac); //System.err.println(cvac + ": " + term); String category = term.getOntology(); List<String> ancestors = TerminologyUtils.getAllAncestorsAccession(term.getAccession(), terminologyservice); //List<Tree<CvTerm>> treeList = this.terminologyservice.findTerminology(TerminologyCv.valueOf(category)); //if(treeList.isEmpty()) ancestors2.clear(); //ancestors2 = this.terminologyservice.getAncestorSets(treeList, term.getAccession()); //Set<String> ancestors2 = TerminologyUtils.getAncestorSets(tree, term.getAccession()); //if(ancestors.size() != ancestors2.size()) { // Differences for FA-, KW-, SL-, DO-, and enzymes... //System.err.println(cvac + " old method: " + ancestors.size() + " new method: " + ancestors2.size() + " category" + category); //System.err.println(ancestors); //} if(ancestors != null) for (String ancestor : ancestors) { cv_ancestors_acs.add(ancestor); } List<String> synonyms = term.getSynonyms(); if(synonyms != null) { //if (term.getOntology().startsWith("Go")) System.err.println("adding: " + synonyms.get(0)); for (String synonym : synonyms) cv_synonyms.add(synonym.trim()); // No duplicate: this is a Set } } // Remove uninformative top level ancestors (Annotation, feature, and ROI) cv_ancestors_acs.removeAll(top_acs); // Index generated sets for (String ancestorac : cv_ancestors_acs) { addField(Fields.CV_ANCESTORS_ACS, ancestorac); addField(Fields.CV_ANCESTORS, this.terminologyservice.findCvTermByAccession(ancestorac).getName()); } //System.err.println("CV ancestors done."); for (String synonym : cv_synonyms) { addField(Fields.CV_SYNONYMS, synonym); } List<CvTerm> enzymes = entry.getEnzymes(); String ec_names = ""; for (CvTerm currenzyme : enzymes) { cvac_cnt++; cv_acs.add(currenzyme.getAccession()); addField(Fields.CV_NAMES, currenzyme.getName()); if(ec_names != "") ec_names += ", "; ec_names += "EC " + currenzyme.getAccession(); List <String> synonyms = currenzyme.getSynonyms(); if(synonyms != null) for (String synonym : synonyms) { addField(Fields.CV_SYNONYMS, synonym.trim()); } } addField(Fields.EC_NAME, ec_names); } @Override public Collection<Fields> getSupportedFields() { return Arrays.asList(Fields.CV_ANCESTORS_ACS, Fields.CV_ANCESTORS, Fields.CV_SYNONYMS, Fields.CV_NAMES, Fields.CV_ACS, Fields.EC_NAME); } }