/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package uk.ac.ebi.ep.base.search; import java.util.ArrayList; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ForkJoinPool; import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml_cml.schema.cml2.react.Reaction; import uk.ac.ebi.ep.data.common.CommonSpecies; import uk.ac.ebi.ep.data.domain.EnzymePortalCompound; import uk.ac.ebi.ep.data.domain.EnzymePortalEcNumbers; import uk.ac.ebi.ep.data.domain.UniprotEntry; import uk.ac.ebi.ep.data.domain.UniprotXref; import uk.ac.ebi.ep.data.enzyme.model.ChemicalEntity; import uk.ac.ebi.ep.data.enzyme.model.CountableMolecules; import uk.ac.ebi.ep.data.enzyme.model.EcClass; import uk.ac.ebi.ep.data.enzyme.model.Enzyme; import uk.ac.ebi.ep.data.enzyme.model.EnzymeHierarchy; import uk.ac.ebi.ep.data.enzyme.model.EnzymeModel; import uk.ac.ebi.ep.data.enzyme.model.EnzymeReaction; import uk.ac.ebi.ep.data.enzyme.model.Molecule; import uk.ac.ebi.ep.data.enzyme.model.Pathway; import uk.ac.ebi.ep.data.enzyme.model.ProteinStructure; import uk.ac.ebi.ep.data.enzyme.model.ReactionPathway; import uk.ac.ebi.ep.data.enzyme.model.Sequence; import uk.ac.ebi.ep.data.exceptions.EnzymeRetrieverException; import uk.ac.ebi.ep.data.exceptions.MultiThreadingException; import uk.ac.ebi.ep.data.search.model.Compound; import uk.ac.ebi.ep.data.search.model.Disease; import uk.ac.ebi.ep.data.search.model.EnzymeAccession; import uk.ac.ebi.ep.data.service.EnzymePortalService; import uk.ac.ebi.ep.enzymeservices.chebi.ChebiAdapter; import uk.ac.ebi.ep.enzymeservices.chebi.IChebiAdapter; import uk.ac.ebi.ep.enzymeservices.intenz.IntenzAdapter; import uk.ac.ebi.ep.enzymeservices.rhea.IRheaAdapter; import uk.ac.ebi.ep.enzymeservices.rhea.RheaWsAdapter; import uk.ac.ebi.ep.literatureservice.service.LiteratureService; import uk.ac.ebi.ep.uniprotservice.transferObjects.LabelledCitation; import static uk.ac.ebi.ep.util.query.LuceneQueryBuilder.LOGGER; import uk.ac.ebi.rhea.ws.client.RheaFetchDataException; /** * * @author joseph */ public class EnzymeRetriever {// extends EnzymeBase { private static final Logger logger = LoggerFactory.getLogger(EnzymeRetriever.class); private IRheaAdapter rheaAdapter; private IChebiAdapter chebiAdapter; private IntenzAdapter intenzAdapter; private LiteratureService literatureService; private EnzymePortalService enzymePortalService; public void setLiteratureService(LiteratureService literatureService) { this.literatureService = literatureService; } public void setEnzymePortalService(EnzymePortalService enzymePortalService) { this.enzymePortalService = enzymePortalService; } /** * Lazily constructs a new adapter if needed. * * @return a ChEBI adapter. */ public IChebiAdapter getChebiAdapter() { if (chebiAdapter == null) { chebiAdapter = new ChebiAdapter(); } return chebiAdapter; } public IntenzAdapter getIntenzAdapter() { if (intenzAdapter == null) { intenzAdapter = new IntenzAdapter(); } return intenzAdapter; } public IRheaAdapter getRheaAdapter() { if (rheaAdapter == null) { rheaAdapter = new RheaWsAdapter(); } return rheaAdapter; } public void setRheaAdapter(IRheaAdapter rheaAdapter) { this.rheaAdapter = rheaAdapter; } public void setChebiAdapter(IChebiAdapter chebiAdapter) { this.chebiAdapter = chebiAdapter; } public void setIntenzAdapter(IntenzAdapter intenzAdapter) { this.intenzAdapter = intenzAdapter; } private List<EnzymeAccession> relatedSpeciesWithHumanOnTop(EnzymeAccession ea, UniprotEntry e) { String defaultSpecies = CommonSpecies.HUMAN.getScientificName(); List<EnzymeAccession> relatedSpecies = new LinkedList<>(); if (e.getScientificName() != null && e.getScientificName().equalsIgnoreCase(defaultSpecies)) { relatedSpecies.add(0, ea); } else if (e.getScientificName() != null && !e.getScientificName().equalsIgnoreCase(defaultSpecies)) { relatedSpecies.add(ea); } return relatedSpecies.stream().distinct().collect(Collectors.toList()); } private List<EnzymeAccession> getRelatedSPecies(UniprotEntry uniprotEntry) { List<EnzymeAccession> relatedSpecies = new LinkedList<>(); // TODO query for related proteins and use the obj. possible null pointer if db is not populated with related protein if (uniprotEntry.getRelatedProteinsId() != null) { List<EnzymePortalCompound> compounds = enzymePortalService.findCompoundsByAccession(uniprotEntry.getAccession()); for (UniprotEntry e : uniprotEntry.getRelatedProteinsId().getUniprotEntrySet()) { EnzymeAccession ea = new EnzymeAccession(); //ea.setCompounds(e.getEnzymePortalCompoundSet().stream().distinct().collect(Collectors.toList())); ea.setCompounds(compounds.stream().distinct().collect(Collectors.toList())); ea.setDiseases(e.getEnzymePortalDiseaseSet().stream().distinct().collect(Collectors.toList())); ea.setPdbeaccession(e.getPdbeaccession()); ea.getUniprotaccessions().add(e.getAccession()); ea.setSpecies(e.getSpecies()); ea.setUniprotid(e.getName()); relatedSpecies.addAll(relatedSpeciesWithHumanOnTop(ea, e)); } } return relatedSpecies; } private EnzymeModel getEnzymeModel(String uniprotAccession) { final ForkJoinPool executorService = new ForkJoinPool(); CompletableFuture<UniprotEntry> completableFutureUniprotEntry = CompletableFuture .supplyAsync(() -> enzymePortalService.findByAccession(uniprotAccession), executorService); CompletableFuture<Set<EnzymePortalEcNumbers>> completableFutureEcNumbers = CompletableFuture .supplyAsync(() -> enzymePortalService.findByEcNumbersByAccession(uniprotAccession).stream().collect(Collectors.toSet()), executorService); UniprotEntry uniprotEntry = completableFutureUniprotEntry.join(); Set<EnzymePortalEcNumbers> ecNumbers = completableFutureEcNumbers.join(); EnzymeModel model = new EnzymeModel(); if (uniprotEntry != null) { Enzyme enzyme = new Enzyme(); Sequence sequence = new Sequence(); sequence.setLength(uniprotEntry.getSequenceLength()); enzyme.setSequence(sequence); //suppliment info model.setCommonName(uniprotEntry.getCommonName()); model.setFunction(uniprotEntry.getFunction()); model.setEnzymeFunction(uniprotEntry.getEnzymeFunction()); model.setEntryType(uniprotEntry.getEntryType()); model.setExpEvidenceFlag(uniprotEntry.getExpEvidenceFlag()); model.setFunctionLength(uniprotEntry.getFunctionLength()); model.setProteinName(uniprotEntry.getProteinName()); model.setSpecies(uniprotEntry.getSpecies()); model.setScientificName(uniprotEntry.getScientificName()); model.setCommonName(uniprotEntry.getCommonName()); model.setName(uniprotEntry.getProteinName()); model.setSynonyms(uniprotEntry.getSynonym()); model.setRelatedspecies(getRelatedSPecies(uniprotEntry)); model.setAccession(uniprotEntry.getAccession()); model.getUniprotaccessions().add(uniprotEntry.getAccession()); model.setEnzymePortalEcNumbersSet(ecNumbers); ecNumbers.stream().forEach(ec -> { EnzymeHierarchy enzymeHierarchy = new EnzymeHierarchy(); EcClass ecClass = new EcClass(); ecClass.setEc(ec.getEcNumber()); enzymeHierarchy.getEcclass().add(ecClass); enzyme.getEchierarchies().add(enzymeHierarchy); model.getEc().add(ec.getEcNumber()); }); model.setEnzyme(enzyme); executorService.shutdown(); return model; } return model; } public EnzymeModel getEnzyme(String uniprotAccession) { EnzymeModel model = getEnzymeModel(uniprotAccession); try { getIntenzAdapter().getEnzymeDetails(model); } catch (MultiThreadingException ex) { LOGGER.error("Error getting enzyme details from Intenz webservice", ex); } List<String> prov = addIntenzProvenance(); model.getEnzyme().setProvenance(prov); return model; } private List<String> addIntenzProvenance() { List<String> prov = new LinkedList<>(); prov.add("IntEnz"); prov.add("UniProt"); prov.add("IntEnz - (Integrated relational Enzyme database) is a freely available resource focused on enzyme nomenclature.\n"); prov.add("UniProt - The mission of UniProt is to provide the scientific community with a comprehensive, high-quality and freely accessible resource of protein sequence and functional information"); return prov; } public EnzymeModel getProteinStructure(String uniprotAccession) throws EnzymeRetrieverException { logger.debug(" -STR- before getEnzymeSummary"); EnzymeModel model = getEnzymeModel(uniprotAccession); addProteinStructures(model); return model; } private void addProteinStructures(EnzymeModel model) { List<UniprotXref> pdbcodes = enzymePortalService.findPDBcodesByAccession(model.getAccession()); pdbcodes.stream().map(pdb -> { String pdbId = pdb.getSourceId().toLowerCase(); model.getPdbeaccession().add(pdbId); ProteinStructure structure = new ProteinStructure(); structure.setId(pdbId); structure.setName(pdb.getSourceName()); return structure; }).forEach(structure -> model.getProteinstructure().add(structure)); } public EnzymeModel getDiseases(String uniprotAccession) throws EnzymeRetrieverException { EnzymeModel model = getEnzymeModel(uniprotAccession); addDiseases(model); return model; } /** * Adds any related diseases to the enzyme model. * * @param enzymeModel the model without disease info. * @since 1.1.0 */ protected void addDiseases(EnzymeModel enzymeModel) { List<Disease> diseases = enzymePortalService.findDiseasesByAccession(enzymeModel.getAccession()); enzymeModel.setDisease(diseases); } public EnzymeModel getLiterature(String uniprotAccession, int limit) throws EnzymeRetrieverException { EnzymeModel model = getEnzymeModel(uniprotAccession); List<LabelledCitation> citations = literatureService.getCitationsByAccession(uniprotAccession, limit); if (citations != null) { model.setLiterature(new ArrayList<>(citations)); } return model; } /** * Retrieves the whole enzyme model for comparisons. * * @param acc the UniProt accession of the enzyme. * @return a complete model. * @throws EnzymeRetrieverException in case of problem retrieving the model * from UniProt, or small molecules from ChEBI. * @since 1.1.0 */ public EnzymeModel getWholeModel(String acc) throws EnzymeRetrieverException { // This model includes summary, reactions and pathways: EnzymeModel model = getEnzymeModel(acc); // Add the missing bits: addReactionsPathwaysWholeModel(model); addProteinStructures(model); addMolecules(model); addDiseases(model); return model; } public EnzymeModel getMolecules(String uniprotAccession) throws EnzymeRetrieverException { EnzymeModel model = getEnzymeModel(uniprotAccession); addMolecules(model); return model; } /** * Adds any available data about small molecules to the model. * * @param model the model * @throws EnzymeRetrieverException in case of problem retrieving detailed * info about small molecules from ChEBI. * @since 1.1.0 */ protected void addMolecules(EnzymeModel model) throws EnzymeRetrieverException { // try { List<EnzymePortalCompound> compounds = enzymePortalService.findCompoundsByAccession(model.getAccession()); CountableMolecules activators = null, inhibitors = null, cofactors = null, drugs = null, bioactive = null; if (compounds != null) { for (Compound compound : compounds) { // Classify compounds from the DB: switch (compound.getRole()) { case ACTIVATOR: activators = addMoleculeToGroup(activators, compound); break; case INHIBITOR: inhibitors = addMoleculeToGroup(inhibitors, compound); break; case COFACTOR: cofactors = addMoleculeToGroup(cofactors, compound); break; case DRUG: drugs = addMoleculeToGroup(drugs, compound); break; case BIOACTIVE: bioactive = addMoleculeToGroup(bioactive, compound); break; } } } model.setMolecule(new ChemicalEntity() .withActivators(activators) .withInhibitors(inhibitors) .withCofactors(cofactors) .withDrugs(drugs) .withBioactiveLigands(bioactive)); logger.debug("MOLECULES before getting complete entries from ChEBI"); //disable calls to ChEBI for now as it returns inconsistent data for cofactors sometimes. //getChebiAdapter().getMoleculeCompleteEntries(model); logger.debug("MOLECULES before provenance"); List<String> prov = new LinkedList<>(); prov.add("ChEBI"); prov.add("ChEMBL"); // prov.add("RELEASED DATE = " + new Date()); prov.add("ChEBI - (Chemical Entities of Biological Interest) is a freely available dictionary of molecular entities focused on ‘small’ chemical compounds."); prov.add("ChEMBL is a database of bioactive drug-like small" + " molecules, it contains 2-D structures, calculated" + " properties (e.g. logP, Molecular Weight, Lipinski" + " Parameters, etc.) and abstracted bioactivities (e.g." + " binding constants, pharmacology and ADMET data)."); if (model.getMolecule() != null) { model.getMolecule().setProvenance(prov); } //} catch (ChebiFetchDataException ex) { //throw new EnzymeRetrieverException( // "Failed to get small molecule details from Chebi", ex); //} } /** * Adds a compound to the group (inhibitors, cofactors, etc).<br/> * If the group is <code>null</code> it will be created and initialized. * <br> * It will be modified by adding the passed compound and increasing the * total count. * * @param group a molecules group. * @param comp a compound. * @return the modified (possibly newly created) group of molecules. */ private CountableMolecules addMoleculeToGroup(CountableMolecules group, Compound comp) { if (group == null) { group = new CountableMolecules(); group.setMolecule(new ArrayList<>()); group.setTotalFound(0); } Molecule molecule = new Molecule(); molecule.setName(comp.getName()); molecule.setId(comp.getId()); group.getMolecule().add(molecule); group.setTotalFound(group.getTotalFound() + 1); return group; } public EnzymeModel getReactionsPathways(String uniprotAccession) throws EnzymeRetrieverException { EnzymeModel model = getEnzymeModel(uniprotAccession); List<String> catalyticActivities = enzymePortalService.findCatalyticActivitiesByAccession(uniprotAccession); model.setCatalyticActivities(catalyticActivities); addReactionsPathways(model); return model; } protected void addReactionsPathways(EnzymeModel model) throws EnzymeRetrieverException { //Get pathways from uniprot --> maybe not for now //Get pathways from Biomart (from Reactome reaction retrieved from Rhea) //Choose 2 top pathways to extract from Reactome Website // View pathway in reactome should be associated with the reaction. //EnzymeModel enzymeModel = (EnzymeModel)this.uniprotAdapter.getReactionPathwaySummary(uniprotAccession); logger.debug(" -RP- before uniprotAdapter.getEnzymeSummary"); Set<ReactionPathway> rpList = new HashSet<>(); //EnzymeModel model = getEnzymeModel(uniprotAccession); ReactionPathway reactionPathway = new ReactionPathway(); List<EnzymeReaction> reactions = enzymePortalService.findReactionsByAccession(model.getAccession()); List<Pathway> pathways = enzymePortalService.findPathwaysByAccession(model.getAccession()); model.setPathways(pathways); reactionPathway.setPathways(pathways); reactionPathway.setReactions(reactions); if (reactions != null && !reactions.isEmpty()) { reactionPathway.setReactions(reactions); } if (!reactionPathway.getReactions().isEmpty()) { rpList.add(reactionPathway); } model.setReactionpathway(rpList.stream().distinct().collect(Collectors.toList())); // The model comes with any available Reactome pathway IDs // in one ReactionPathway object, no more. // Now we get more ReactionPathways (one per Rhea reaction): logger.debug(" -RP- before queryRheaWsForReactions"); queryRheaWsForReactions(model); } protected EnzymeModel addReactionsPathwaysWholeModel(EnzymeModel model) throws EnzymeRetrieverException { ReactionPathway reactionPathway = new ReactionPathway(); List<EnzymeReaction> reactions = enzymePortalService.findReactionsByAccession(model.getAccession()); List<Pathway> pathways = enzymePortalService.findPathwaysByAccession(model.getAccession()); model.setPathways(pathways); reactionPathway.setPathways(pathways); if (reactions != null && !reactions.isEmpty()) { reactionPathway.setReactions(reactions); } model.getReactionpathway().add(reactionPathway); if (model.getReactionpathway().isEmpty()) { logger.warn("Searching Rhea for reaction for accession " + model.getAccession()); // The model comes with any available Reactome pathway IDs // in one ReactionPathway object, no more. // Now we get more ReactionPathways (one per Rhea reaction): logger.debug(" -RP- before queryRheaWsForReactions"); List<Reaction> rheaReactions = new ArrayList<>(); try { rheaReactions = rheaAdapter.getRheasInCmlreact(model .getUniprotaccessions().get(0)); } catch (RheaFetchDataException ex) { //throw new EnzymeRetrieverException("Query data from Rhea failed! ", ex); logger.error("Query data from Rhea failed! ", ex); } for (Reaction reaction : rheaReactions) { ReactionPathway rPathway = rheaAdapter.getReactionPathway(reaction); rPathway.setPathways(pathways); model.getReactionpathway().add(rPathway); } } model.getReactionpathway().stream().distinct().collect(Collectors.toList()); return model; } /** * Searches Rhea for the primary UniProt accession in the model and adds the * corresponding reactions if found. <br><b>WARNING:</b> the added reactions * have links only to Reactome and MACiE. The links are strings containing a * complete URL. * * @param enzymeModel * @return the same model updated with ReactionPathway objects, one per * reaction found. * @throws EnzymeRetrieverException */ private EnzymeModel queryRheaWsForReactions(EnzymeModel enzymeModel) throws EnzymeRetrieverException { List<Reaction> reactions = new ArrayList<>(); try { reactions = rheaAdapter.getRheasInCmlreact(enzymeModel .getUniprotaccessions().get(0)); } catch (RheaFetchDataException ex) { throw new EnzymeRetrieverException("Query data from Rhea failed! ", ex); } for (Reaction reaction : reactions) { // XXX This adapted reaction will have links only to Reactome and MACiE!: ReactionPathway reactionPathway = rheaAdapter.getReactionPathway(reaction); //reactionPathways.add(reactionPathway); enzymeModel.getReactionpathway().add(reactionPathway); } //enzymeModel.getReactionpathway().addAll(reactionPathways); enzymeModel.getReactionpathway().stream().distinct().collect(Collectors.toList()); return enzymeModel; } }