/* * Copyright (C) 2010-2013 "Bio4j" * * This file is part of Bio4j * * Bio4j is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package com.bio4j.neo4jdb.codesamples; import com.bio4j.neo4jdb.model.nodes.EnzymeNode; import com.bio4j.neo4jdb.model.nodes.ProteinNode; import com.bio4j.neo4jdb.model.nodes.ncbi.NCBITaxonNode; import com.bio4j.neo4jdb.model.relationships.protein.ProteinEnzymaticActivityRel; import com.bio4j.neo4jdb.model.util.Bio4jManager; import com.bio4j.neo4jdb.model.util.NodeRetriever; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import org.neo4j.graphdb.Direction; import org.neo4j.graphdb.Relationship; /** * * @author Pablo Pareja Tobes <ppareja@era7.com> */ public class BiodieselProductionSample { /** * @param args Just one argument (protein accession) */ public static void main(String[] args) { if (args.length != 3) { System.out.println("The program expects two parameters: \n" + "1. Bio4j DB folder\n" + "2. Enzyme IDs file\n" + "3. Output file name"); } else { File enzymeFile = new File(args[1]); File outFile = new File(args[2]); Bio4jManager manager = null; try { //--creating manager and node retriever---- manager = new Bio4jManager(args[0]); NodeRetriever nodeRetriever = new NodeRetriever(manager); LinkedList<EnzymeNode> enzymeList = new LinkedList<>(); List<String> enzymeIDList = new ArrayList<>(); System.out.println("Reading input file..."); BufferedReader reader = new BufferedReader(new FileReader(enzymeFile)); String line; while((line = reader.readLine()) != null){ EnzymeNode enzymeNode = nodeRetriever.getEnzymeById(line); if(enzymeNode != null){ enzymeList.add(enzymeNode); enzymeIDList.add(line); }else{ System.out.println("There was no Enzyme found for ID: " + line); } } reader.close(); System.out.println("Done!"); Map<String, ProteinNode> proteinMap = new HashMap<>(); for (EnzymeNode enzymeNode : enzymeList) { System.out.println("Retrieving proteins for enzyme: " + enzymeNode.getId()); Iterator<Relationship> iterator = enzymeNode.getNode().getRelationships(Direction.INCOMING, new ProteinEnzymaticActivityRel(null)).iterator(); while(iterator.hasNext()){ ProteinNode proteinNode = new ProteinNode(iterator.next().getStartNode()); ProteinNode tempProtein = proteinMap.get(proteinNode.getAccession()); if(tempProtein == null){ proteinMap.put(proteinNode.getAccession(), proteinNode); } } } System.out.println("There were " + proteinMap.size() + " proteins found"); System.out.println("Filtering proteins that don't have a RefSeq GenomeElement associated..."); int proteinsFilteredCounter = 0; for (String string : proteinMap.keySet()) { ProteinNode tempProtein = proteinMap.get(string); if(tempProtein.getGenomeElements().isEmpty()){ proteinMap.remove(string); proteinsFilteredCounter++; } } System.out.println("Done!"); System.out.println("There were " + proteinsFilteredCounter + " proteins filtered..."); System.out.println("Now we filtered those proteins that have less than 4 enzymes associated from the list provided..."); proteinsFilteredCounter = 0; for (String string : proteinMap.keySet()) { ProteinNode tempProtein = proteinMap.get(string); List<EnzymeNode> tempEnzymes = tempProtein.getProteinEnzymaticActivity(); int tempCounter = 0; boolean filterPassed = false; for(int i=0; i<tempEnzymes.size() && !filterPassed; i++){ EnzymeNode enzymeNode = tempEnzymes.get(i); if(enzymeIDList.contains(enzymeNode.getId())){ tempCounter++; if(tempCounter >= 4){ filterPassed = true; } } } if(!filterPassed){ proteinsFilteredCounter++; proteinMap.remove(string); } } System.out.println("Done!"); System.out.println("There were " + proteinsFilteredCounter + " proteins filtered..."); System.out.println("Time to retrieve an filter by organisms..."); proteinsFilteredCounter = 0; for (String string : proteinMap.keySet()) { ProteinNode tempProtein = proteinMap.get(string); NCBITaxonNode taxon = nodeRetriever.getNCBITaxonByTaxId(tempProtein.getOrganism().getNcbiTaxonomyId()); boolean bacteriaFound = false; while(!bacteriaFound && taxon != null){ if(taxon.getScientificName().equals("Bacteria")){ bacteriaFound = true; }else{ taxon = taxon.getParent(); } } if(!bacteriaFound){ proteinsFilteredCounter++; proteinMap.remove(string); } } System.out.println("Done!"); System.out.println("There were " + proteinsFilteredCounter + " proteins filtered..."); System.out.println("Writing output file..."); BufferedWriter writer = new BufferedWriter(new FileWriter(outFile)); for (String string : proteinMap.keySet()) { writer.write(string + "\n"); } writer.close(); } catch (Exception e) { e.printStackTrace(); } finally { //---closing the manager---- manager.shutDown(); } } } }