/* * Copyright (C) 2010-2011 "Bio4j" * * This file is part of Bio4j * * Bio4j is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package com.bio4j.neo4jdb.codesamples; import com.bio4j.neo4jdb.model.nodes.InterproNode; import com.bio4j.neo4jdb.model.nodes.OrganismNode; import com.bio4j.neo4jdb.model.nodes.ProteinNode; import com.bio4j.neo4jdb.model.nodes.TaxonNode; import com.bio4j.neo4jdb.model.nodes.refseq.CDSNode; import com.bio4j.neo4jdb.model.nodes.refseq.GenomeElementNode; import com.bio4j.neo4jdb.model.relationships.protein.ProteinInterproRel; import com.bio4j.neo4jdb.model.relationships.protein.ProteinOrganismRel; import com.bio4j.neo4jdb.model.util.Bio4jManager; import com.bio4j.neo4jdb.model.util.NodeRetriever; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.neo4j.graphdb.Direction; import org.neo4j.graphdb.Relationship; /** * * Retrieving all nucleotide sequences of CDS corresponding to LysR family * proteins of Uniprot (IPR000847 positive) belonging to * enterobacteriaceae (taxonomy) * * @author Pablo Pareja Tobes <ppareja@era7.com> */ public class RealUseCase1 { public static void main(String[] args) { if (args.length != 1) { System.out.println("The program expects the following parameters: \n" + "1. Bio4j DB folder\n"); } else { Bio4jManager manager = null; try { //--creating manager and node retriever---- manager = new Bio4jManager(args[0]); NodeRetriever nodeRetriever = new NodeRetriever(manager); //First we get the taxon node we're interested in TaxonNode taxonNode = nodeRetriever.getTaxonByName("Enterobacteriaceae"); System.out.println("taxonNode = " + taxonNode); //In this list we're gonna store the organism nodes ArrayList<OrganismNode> organisms = new ArrayList<OrganismNode>(); System.out.println("Getting organisms..."); //Now we get the organisms getAllSubOrganisms(taxonNode,organisms); //Here we'll store selected proteins ArrayList<ProteinNode> proteins = new ArrayList<ProteinNode>(); //getting proteins for those organisms and selecting the ones //that have the interpro value = IPR000847 ProteinOrganismRel proteinOrganismRel = new ProteinOrganismRel(null); ProteinInterproRel proteinInterproRel = new ProteinInterproRel(null); System.out.println("looping through organisms..."); //----------looping through organisms checking every associated protein---- for (OrganismNode organismNode : organisms) { System.out.println("organismNode = " + organismNode); Iterator<Relationship> iterator = organismNode.getNode().getRelationships(proteinOrganismRel, Direction.INCOMING).iterator(); while(iterator.hasNext()){ ProteinNode tempProt = new ProteinNode(iterator.next().getStartNode()); Iterator<Relationship> interProIterator = tempProt.getNode().getRelationships(proteinInterproRel, Direction.OUTGOING).iterator(); boolean interproFound = false; while(interProIterator.hasNext() && !interproFound){ InterproNode interpro = new InterproNode(interProIterator.next().getEndNode()); if(interpro.getId().equals("IPR000847")){ interproFound = true; } } //---the protein is selected in case it has the interpro id--- if(interproFound){ proteins.add(tempProt); } } } System.out.println("looping through proteins..."); //At this point we should already have the proteins we want //now it's time to retrieve their CDS sequences for (ProteinNode proteinNode : proteins) { System.out.println("protein = " + proteinNode.getAccession()); List<GenomeElementNode> genomeElements = proteinNode.getGenomeElements(); for (GenomeElementNode genomeElementNode : genomeElements) { System.out.println("genomeElement = " + genomeElementNode.getVersion()); List<CDSNode> cdsList = genomeElementNode.getCDS(); for (CDSNode cDSNode : cdsList) { System.out.println("cDSNode = " + cDSNode); } } } System.out.println("Done! :)"); } catch (Exception e) { //deal somehow with the exception e.printStackTrace(); } finally { //---closing the manager---- manager.shutDown(); } } } private static void getAllSubOrganisms(TaxonNode taxonNode, ArrayList<OrganismNode> organisms) { System.out.println(taxonNode); organisms.addAll(taxonNode.getOrganisms()); List<TaxonNode> children = taxonNode.getChildren(); for (TaxonNode child : children) { getAllSubOrganisms(child, organisms); } } }