package org.genedb.db.adhoc; import org.apache.log4j.Logger; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Types; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.jgrapht.*; import org.jgrapht.graph.*; import org.jgrapht.alg.*; /** * Insert a leaf phylonode (corresponding to an organism that already exists in the organism table) * into phylotree 1. This code has very few external dependencies: it needs only the PostgreSQL * JDBC driver to run. * <p> * Usage is of the form * <pre>java PhylonodeManager <jdbc URL> <database username> [<parent phylonode label> <organism common name>]</pre> * if the optional arguments are omitted, it just prints a textual representation of the phylotree to standard output. Otherwise it * inserts the new node, then prints the resulting tree. * * You can run it as follows (on one line for ease of copypasta): * * java -classpath lib/postgresql-8.3-603.jdbc4.jar:ant-build/dist/genedb-access.jar org.genedb.db.adhoc.PhylonodeManager jdbc:postgresql://localhost:5432/pathogens * * @author rn2 * */ public class PanGenomeManager { private enum PanGeneStatisticalType {CORE,MISC,UNIQUE}; private enum PanGeneBioType {CODING,NONCODING,PSEUDO,MIXED}; private Connection conn; private static final Logger logger = Logger.getLogger(PanGenomeManager.class); private PanGenomeManager(Connection conn) { this.conn = conn; } private List <Set <Long>> getConnectedComponents(int organism_ids[]) throws Exception{ String organism_ids_list=new String(""); for(int i=0;i<organism_ids.length;i++){ organism_ids_list=(i<organism_ids.length-1)?organism_ids_list+""+organism_ids[i]+",":organism_ids_list+""+organism_ids[i]; } PreparedStatement st = conn.prepareStatement("select * from organism where organism_id in ("+organism_ids_list+")"); ResultSet rs = st.executeQuery(); while (rs.next()) { String organism_name = rs.getString("common_name"); logger.info("organism to be searched "+organism_name); } String orthologues_query=new String("select mRNA_gene_1.object_id as gene1_id,mRNA_gene_2.object_id as gene2_id" +" from" +" (" +" select fr1.subject_id,fr2.subject_id as object_id" +" from" +" feature pmf" +" join" +" feature_relationship fr1 on (pmf.type_id=436 and pmf.uniquename not like 'PAN%' and pmf.feature_id=fr1.object_id and fr1.type_id=78)" +" join" +" feature_relationship fr2 on (fr1.object_id=fr2.object_id and fr1.subject_id!=fr2.subject_id)" +" join " +" feature ppf1 on (fr1.subject_id=ppf1.feature_id and ppf1.organism_id in ("+organism_ids_list+") )" +" join " +" feature ppf2 on (fr2.subject_id=ppf2.feature_id and ppf2.organism_id in ("+organism_ids_list+") )" +" union" +" select fr.subject_id,fr.object_id" +" from" +" feature ppf1" +" join" +" feature_relationship fr on (ppf1.feature_id=fr.subject_id and fr.type_id=78)" +" join" +" feature ppf2 on (fr.object_id=ppf2.feature_id " +" and ppf1.organism_id in ("+organism_ids_list+") and ppf2.organism_id in ("+organism_ids_list+") )" +" union " +" select fr.object_id as subject_id,fr.subject_id as object_id" +" from" +" feature ppf1" +" join" +" feature_relationship fr on (ppf1.feature_id=fr.subject_id and fr.type_id=78)" +" join" +" feature ppf2 on (fr.object_id=ppf2.feature_id " +" and ppf1.organism_id in ("+organism_ids_list+") and ppf2.organism_id in ("+organism_ids_list+") )" +" )" +" as polypeptide_relationship" +" join" +" feature_relationship polypeptide_mRNA_1 on (polypeptide_relationship.subject_id=polypeptide_mRNA_1.subject_id and polypeptide_mRNA_1.type_id=69)" +" join" +" feature_relationship polypeptide_mRNA_2 on (polypeptide_relationship.object_id=polypeptide_mRNA_2.subject_id and polypeptide_mRNA_2.type_id=69)" +" join" +" feature_relationship mRNA_gene_1 on (polypeptide_mRNA_1.object_id=mRNA_gene_1.subject_id and mRNA_gene_1.type_id=42)" +" join" +" feature_relationship mRNA_gene_2 on (polypeptide_mRNA_2.object_id=mRNA_gene_2.subject_id and mRNA_gene_2.type_id=42)"); String allgenesQuery= "select feature_id from feature where (type_id=792 or type_id=423) and organism_id in ("+organism_ids_list+")"; st = conn.prepareStatement(allgenesQuery); logger.info("sql= "+allgenesQuery); rs = st.executeQuery(); UndirectedGraph<Long, DefaultEdge> geneGraph = new SimpleGraph<Long, DefaultEdge>(DefaultEdge.class); long vertexcount=0; logger.info("Adding all vertices to the graph "); while (rs.next()) { Long geneID=rs.getLong("feature_id"); geneGraph.addVertex(geneID); vertexcount++; } logger.info(vertexcount+" vertices added to the graph "); logger.info("sql= "+orthologues_query); st = conn.prepareStatement(orthologues_query); rs = st.executeQuery(); logger.info("Adding all edges to the graph "); long edgecount=0; while (rs.next()) { Long gene1=rs.getLong("gene1_id"); Long gene2=rs.getLong("gene2_id"); logger.info("edge = "+rs.getLong("gene1_id")+"->"+rs.getLong("gene2_id")); geneGraph.addEdge(gene1, gene2); edgecount++; } logger.info(edgecount+" edges added to the graph "); ConnectivityInspector ci = new ConnectivityInspector(geneGraph); List <Set <Long>> list =ci.connectedSets(); return list; } private int createPanGenome(int organism_ids[],int vorganism_id) throws Exception{ int no_of_organisms=organism_ids.length; int max_component_length=0; List <Set <Long>> list=getConnectedComponents(organism_ids); for ( Set<Long> componentSet : list){ if(componentSet.size()>max_component_length) max_component_length=componentSet.size(); } Long componentID= new Long(0); String qStr="("; for (int i=0;i<max_component_length-1;i++) {qStr+="?,";} qStr+="?)"; String gene_residues_query=new String("select" +" featureloc.feature_id," +" case" +" when featureloc.strand=1 then" +" substring(srcfeature.residues from featureloc.fmin+1 for (featureloc.fmax-featureloc.fmin) )" +" else" +" translate(substring(srcfeature.residues from featureloc.fmin+1 for (featureloc.fmax-featureloc.fmin) ),'acgt','tgca')" +" end as residues," +" featureloc.strand," +" srcfeature.organism_id" +" from featureloc" +" join feature srcfeature on (featureloc.srcfeature_id=srcfeature.feature_id)" +" where featureloc.feature_id in "+qStr); PreparedStatement st = conn.prepareStatement(gene_residues_query); // build virtual chromosome logger.info("gene_residues_query= "+gene_residues_query); StringBuilder vchromosome = new StringBuilder(); // creates empty builder, capacity 16 // empty chromosome Long chr_feature_id=insertVirtualChromosome(vchromosome,vorganism_id); long fmin=0; // for each connected component // create a virtual gene // make orthopara links b/w the virtual gene and the genes in the connected component int count=0; for ( Set<Long> componentSet : list){ //if(count==100) { // break; //} count++; int gene_param_index=1; for (Long gene_id_param : componentSet){ st.setLong(gene_param_index,gene_id_param); gene_param_index++; } int i=gene_param_index; while(i<=max_component_length){ st.setNull(i,Types.INTEGER); i++; } ResultSet rs=st.executeQuery(); HashMap<Integer,Long> organism_gene_map= new HashMap<Integer,Long>(); HashMap<Integer,String> organism_genesequence_map= new HashMap<Integer,String>(); int ortholog_count=0; while (rs.next()) { Long gene_id=rs.getLong("feature_id"); String residues=rs.getString("residues"); Integer gene_strand=rs.getInt("strand"); Integer organism_id=rs.getInt("organism_id"); if(!organism_gene_map.containsKey(organism_id)){ organism_gene_map.put(organism_id,gene_id); organism_genesequence_map.put(organism_id,residues); ortholog_count++; } else{ logger.info("filtered gene "+gene_id+" of organism"+organism_id); organism_gene_map.remove(organism_id); organism_genesequence_map.remove(organism_id); ortholog_count--; } //logger.info(gene_id+"->"+residues); } ArrayList<String> seqArr = new ArrayList<String>(organism_genesequence_map.values()); String consensus_seq=consensusDNASequence(seqArr); // check after applying connected components filter atleast 1 gene remains if(consensus_seq.length()==0) {continue;} vchromosome.append(consensus_seq); vchromosome.append("nnnnnnnnnn"); HashMap<String,Long> vgene_map= new HashMap<String,Long>(); PanGeneStatisticalType geneStatisticalType; if(ortholog_count==1){ geneStatisticalType=PanGeneStatisticalType.UNIQUE; } else if(ortholog_count==no_of_organisms){ geneStatisticalType=PanGeneStatisticalType.CORE; } else{ geneStatisticalType=PanGeneStatisticalType.MISC; } vgene_map=createVirtualGene(componentID,vorganism_id); //logger.info("Successfully created virtual gene "+vgene_map.get("gene")); long fmax=vchromosome.length()-10; featureLocVirtualGene(vgene_map,vorganism_id,fmin,fmax); fmin=vchromosome.length(); int rank=0; PanGeneBioType consensusBiotype=PanGeneBioType.CODING; boolean rna_flag=false; boolean pseudo_flag=false; for ( Long gene_id : organism_gene_map.values()){ PanGeneBioType geneBioType=getGeneBioType(gene_id); consensusBiotype=geneBioType; switch(geneBioType){ case NONCODING: rna_flag=true; break; case PSEUDO: pseudo_flag=true; break; } createOrthoParalink(vgene_map.get("gene"),gene_id,rank,geneBioType); //logger.info("ortho para link created b/w gene "+vgene_map.get("gene")+"and "+gene_id+ "rank="+rank); rank++; } if(rna_flag && pseudo_flag){consensusBiotype=PanGeneBioType.MIXED;} updateVirtalGeneType(vgene_map.get("gene"),consensusBiotype,geneStatisticalType); logger.info("inserted pan-gene no "+componentID); componentID++; } // now update the chromosome updateVirtualChromosome(chr_feature_id,vchromosome); logger.info("chromosome updated "); insertorganismMaxGeneNumber(vorganism_id, componentID-1); logger.info("max gene inserted"); return 0; } private PanGeneBioType getGeneBioType(Long gene_id) throws Exception{ PanGeneBioType geneBioType=PanGeneBioType.CODING; PreparedStatement st; ResultSet rs; String gene_bioTypeSQL="" +" select cvterm.name from feature_relationship rna_gene" +" join feature rna_feature on (rna_gene.subject_id=rna_feature.feature_id)" +" join feature gene_feature on (rna_gene.object_id=gene_feature.feature_id)" +" join cvterm on (cvterm.cvterm_id=rna_feature.type_id)" +" where rna_gene.type_id=42 and gene_feature.feature_id=?"; st = conn.prepareStatement(gene_bioTypeSQL); st.setLong(1,gene_id); rs = st.executeQuery(); rs.next(); String rnatype=rs.getString("name"); if(rnatype.equals("mRNA")){ geneBioType=PanGeneBioType.CODING; } else if(rnatype.equals("pseudogenic_transcript")){ geneBioType=PanGeneBioType.PSEUDO; } else{ geneBioType=PanGeneBioType.NONCODING; } return geneBioType; } private String consensusDNASequence(ArrayList<String> residuesArr){ String consensus_seq=new String(); boolean length_mismatch=false; long len=residuesArr.get(0).length(); for ( String seq : residuesArr){ if(seq.length()>len) { len=seq.length(); consensus_seq=seq; length_mismatch=true; } else if(seq.length()<len){ length_mismatch=true; } } // if there is a mismatch return the longest sequence ( typically a MSA has to be peformed but its not possible due to time complexity) if(length_mismatch){ return consensus_seq; } // no mismatch proceed and return the sequence with degeneracy/ambiguity codes for(int i=0;i<len;i++){ char result; String chars=new String(""); for(int j=0;j<residuesArr.size();j++){ residuesArr.get(j).charAt(i); if(!chars.contains(residuesArr.get(j).charAt(i)+"")) chars=chars+residuesArr.get(j).charAt(i); } if(chars.equals("a")){ result='a'; } else if(chars.equals("c")){ result='c'; } else if(chars.equals("g")){ result='g'; } else if(chars.equals("t")){ result='t'; } else if(chars.contains("a")&&chars.contains("c")&& !chars.contains("g")&& !chars.contains("t")){ result='m'; } else if(chars.contains("a")&&!chars.contains("c")&&chars.contains("g")&&!chars.contains("t")){ result='r'; } else if(chars.contains("a")&&!chars.contains("c")&&!chars.contains("g")&&chars.contains("t")){ result='w'; } else if(!chars.contains("a")&&chars.contains("c")&&chars.contains("g")&&!chars.contains("t")){ result='s'; } else if(!chars.contains("a")&&chars.contains("c")&&!chars.contains("g")&&chars.contains("t")){ result='y'; } else if(!chars.contains("a")&&!chars.contains("c")&&chars.contains("g")&&chars.contains("t")){ result='k'; } else if(chars.contains("a")&&chars.contains("c")&&chars.contains("g")&&!chars.contains("t")){ result='v'; } else if(chars.contains("a")&&chars.contains("c")&&!chars.contains("g")&&chars.contains("t")){ result='h'; } else if(chars.contains("a")&&!chars.contains("c")&&chars.contains("g")&&chars.contains("t")){ result='d'; } else if(!chars.contains("a")&&chars.contains("c")&&chars.contains("g")&&chars.contains("t")){ result='b'; } else{ result='n'; } consensus_seq=consensus_seq+result; } return consensus_seq; } private void deletePanGenome(String genus,String species,int organism_ids[]) throws Exception{ PreparedStatement st; ResultSet rs; String qStr=""; for(int i=0;i<organism_ids.length-1;i++) qStr+=organism_ids[i]+","; qStr+=organism_ids[organism_ids.length-1]; String query_vorganismsql = "SELECT organism_id from organismprop where type_id=1706 and value=?"; st = conn.prepareStatement(query_vorganismsql); st.setString(1,qStr); rs = st.executeQuery(); rs.next(); Long organism_id=rs.getLong("organism_id"); String delete_vorganism_sql = "DELETE from organism where organism_id=?"; st = conn.prepareStatement(delete_vorganism_sql); st.setLong(1,organism_id); st.executeUpdate(); logger.info("deleted organism "+organism_id); } private void updatePanGenome(String genus,String species) throws Exception{ PreparedStatement st; ResultSet rs; String toplevelfeature_organism_sql = "select feature.feature_id,organism.organism_id from" +" organism" +" join" +" feature on (feature.organism_id=organism.organism_id and feature.type_id=427)" +" where organism.genus=? and organism.species=?"; st = conn.prepareStatement(toplevelfeature_organism_sql); st.setString(1,genus); st.setString(2,species); rs = st.executeQuery(); rs.next(); Long vchromosome_id=rs.getLong("feature_id"); Long vorganism_id=rs.getLong("organism_id"); String query_pangene_links_sql = "SELECT vfeature.match_feature_id,vfeature.vgene_id,vfeature.rgene_id" +" FROM feature" +" JOIN" +" (select cast(ltrim(split_part(uniquename,'->',1),'PANGENELINK') as integer) as vgene_id," +" cast(split_part(uniquename,'->',2) as integer) as rgene_id," +" feature_id as match_feature_id from feature where type_id=436 and uniquename like 'PANGENELINK%') as vfeature" +" on (vfeature.vgene_id=feature.feature_id)" +" where feature.organism_id=(select organism_id from organism where abbreviation=?)" +" order by vgene_id"; st = conn.prepareStatement(query_pangene_links_sql,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); st.setString(1,genus.substring(0,1)+species); rs = st.executeQuery(); Map <Long,Set <Long>> pangenemap=new HashMap <Long,Set <Long>>(); long prev_vgene_id=0; Set<Long> gene_set=null; while (rs.next()) { Long rgene_id=rs.getLong("rgene_id"); Long vgene_id=rs.getLong("vgene_id"); if(rs.isFirst()){ gene_set=new HashSet<Long>(); gene_set.add(rgene_id); // if this the only set close the new set if(rs.isLast()){ pangenemap.put(vgene_id, gene_set); } } // for sets {2,3,....n} else{ if(prev_vgene_id!=vgene_id){ // close the previous set and open a new set pangenemap.put(prev_vgene_id, gene_set); gene_set=new HashSet<Long>(); } // add the gene to the new set gene_set.add(rgene_id); // close the new set if(rs.isLast()){ pangenemap.put(vgene_id, gene_set); } } prev_vgene_id=vgene_id; } for(Long vgene_id : pangenemap.keySet()){ logger.info(vgene_id+"-->"+pangenemap.get(vgene_id)); } String query_vorganismsql = "SELECT value from organismprop where type_id=1706 and organism_id=?"; st = conn.prepareStatement(query_vorganismsql); st.setLong(1,vorganism_id); rs = st.executeQuery(); rs.next(); String organismidslist=rs.getString("value"); String organismidslistStr []=organismidslist.split(","); int organism_ids[]=new int[organismidslistStr.length]; for(int l=0;l<organismidslistStr.length;l++) organism_ids[l]=Integer.parseInt(organismidslistStr[l]); List <Set <Long>> componentList=getConnectedComponents(organism_ids); Map <Long,Set <Long>> u_pangenemap=new HashMap <Long,Set <Long>>(); // copy list to map long comp_id=0; for (Set compSet:componentList){ u_pangenemap.put(comp_id,compSet); comp_id++; } /*Set<Long> hst=new HashSet<Long>(); hst.add(new Long(5552342)); hst.add(new Long(19007700)); hst.add(new Long(18572533)); u_pangenemap.put(new Long(0),hst); hst=new HashSet<Long>(); hst.add(new Long(5552318)); u_pangenemap.put(new Long(1),hst); hst=new HashSet<Long>(); hst.add(new Long(18572543)); hst.add(new Long(19007712)); hst.add(new Long(5552353)); u_pangenemap.put(new Long(2),hst); hst=new HashSet<Long>(); hst.add(new Long(5552333)); hst.add(new Long(18572521)); hst.add(new Long(19007692)); u_pangenemap.put(new Long(3),hst); hst=new HashSet<Long>(); hst.add(new Long(18572513)); hst.add(new Long( 19007684)); hst.add(new Long(5552322)); u_pangenemap.put(new Long(4),hst); hst=new HashSet<Long>(); hst.add(new Long(19007704)); hst.add(new Long(18572539)); hst.add(new Long(5552347)); u_pangenemap.put(new Long(5),hst); hst=new HashSet<Long>(); hst.add(new Long(5552367)); hst.add(new Long(19007724)); hst.add(new Long(18572556)); u_pangenemap.put(new Long(6),hst); hst=new HashSet<Long>(); hst.add(new Long(18572525)); hst.add(new Long(19007696)); hst.add(new Long(5552338)); u_pangenemap.put(new Long(7),hst); hst=new HashSet<Long>(); hst.add(new Long(19007688)); hst.add(new Long(5552328)); hst.add(new Long(18572517)); u_pangenemap.put(new Long(8),hst); */ Set <Long> tobedeleted_vgene_set=new HashSet <Long>(); Set <Long> donothing_vgene_set=new HashSet <Long>(); Map <Long,Set <Long>> tobeupdated_add_vgene_map=new HashMap <Long,Set <Long>>(); Map <Long,Set <Long>> tobeupdated_delete_vgene_map=new HashMap <Long,Set <Long>>(); Map <Long,Set <Long>> tobeupdated_vgene_map=new HashMap <Long,Set <Long>>(); Map <Long,Set <Long>> tobeinserted_uvgene_map=new HashMap <Long,Set <Long>>(); int max_component_length=0; for ( Long u_vgene_id : u_pangenemap.keySet()){ Set<Long> u_geneset=u_pangenemap.get(u_vgene_id); if(u_geneset.size()>max_component_length){ max_component_length=u_geneset.size(); } ArrayList<List<Long>> op_vgeneids_rgeneids_list=get_listof_matching_sets(u_geneset,pangenemap); if(op_vgeneids_rgeneids_list.size()==0){ // no matches //logger.info(u_vgene_id+"->"+u_geneset+" NO MATCHES"); tobeinserted_uvgene_map.put(u_vgene_id,u_geneset); } else if(op_vgeneids_rgeneids_list.size()>1){ // multiple matches //logger.info(u_vgene_id+"->"+u_geneset+" MULTIMATCH "+op_vgeneids_rgeneids_list ); for (List<Long> op_vgeneids_rgeneids : op_vgeneids_rgeneids_list){ tobedeleted_vgene_set.add(op_vgeneids_rgeneids.get(1)); } tobeinserted_uvgene_map.put(u_vgene_id,u_geneset); } else{ // exactly one match //logger.info(u_vgene_id+"->"+u_geneset+" SINGLEMATCH "+op_vgeneids_rgeneids_list ); Long vgene_id=op_vgeneids_rgeneids_list.get(0).get(1); Set <Long> rgene_id_set=new HashSet(op_vgeneids_rgeneids_list.get(0).subList(2,op_vgeneids_rgeneids_list.get(0).size())); // add these elements if(op_vgeneids_rgeneids_list.get(0).get(0)==1){ tobeupdated_add_vgene_map.put(vgene_id,rgene_id_set); tobeupdated_vgene_map.put(vgene_id,u_geneset); } // remove these elements else if(op_vgeneids_rgeneids_list.get(0).get(0)==2){ tobeupdated_delete_vgene_map.put(vgene_id,rgene_id_set); tobeupdated_vgene_map.put(vgene_id,u_geneset); } else{ donothing_vgene_set.add(vgene_id); } } } Set<Long> temp_set=new HashSet<Long>(); temp_set.addAll(pangenemap.keySet()); temp_set.removeAll(donothing_vgene_set); temp_set.removeAll(tobeupdated_add_vgene_map.keySet()); temp_set.removeAll(tobeupdated_delete_vgene_map.keySet()); tobedeleted_vgene_set.addAll(temp_set); logger.info("DO NOTHING to these vgenes"+donothing_vgene_set); logger.info("UPDATE these vgenes(add)"+tobeupdated_add_vgene_map.keySet()); logger.info("UPDATE these vgenes(del)"+tobeupdated_delete_vgene_map.keySet()); logger.info("DELETE these vgenes"+tobedeleted_vgene_set); logger.info("INSERT these vgenes"+tobeinserted_uvgene_map.keySet()); /*if(true){ return; }*/ // rebuild chromosome StringBuilder vchromosome = new StringBuilder(); // creates empty builder, capacity 16 long fmin=0; long fmax=0; for( Long donothing_vgene_id : donothing_vgene_set){ String query_get_featureloc_sql = "select substring(srcfeature.residues from featureloc.fmin+1 for (featureloc.fmax-featureloc.fmin)) as sequence from featureloc" +" join feature on (featureloc.feature_id=feature.feature_id)" +" join feature srcfeature on (featureloc.srcfeature_id=srcfeature.feature_id)" +" where feature.type_id=792 and featureloc.feature_id=?"; st = conn.prepareStatement(query_get_featureloc_sql,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); st.setLong(1,donothing_vgene_id); rs = st.executeQuery(); rs.next(); String sequence=rs.getString("sequence"); fmin=vchromosome.length(); vchromosome.append(sequence); vchromosome.append("nnnnnnnnnn"); fmax=vchromosome.length()-10; logger.info(donothing_vgene_id+"("+fmin+","+fmax+")"); featureReLocVirtualGene(donothing_vgene_id,fmin,fmax); } // updating genes // refeatureloc // add/delete links int no_of_organisms=organism_ids.length; String qStr="("; for (int i=0;i<max_component_length-1;i++) {qStr+="?,";} qStr+="?)"; String gene_residues_query=new String("select" +" featureloc.feature_id," +" case" +" when featureloc.strand=1 then" +" substring(srcfeature.residues from featureloc.fmin+1 for (featureloc.fmax-featureloc.fmin) )" +" else" +" translate(substring(srcfeature.residues from featureloc.fmin+1 for (featureloc.fmax-featureloc.fmin) ),'acgt','tgca')" +" end as residues," +" featureloc.strand," +" srcfeature.organism_id" +" from featureloc" +" join feature srcfeature on (featureloc.srcfeature_id=srcfeature.feature_id)" +" where featureloc.feature_id in "+qStr); st = conn.prepareStatement(gene_residues_query); for( Long update_add_vgene_id : tobeupdated_add_vgene_map.keySet()){ int gene_param_index=1; for (Long gene_id_param : tobeupdated_vgene_map.get(update_add_vgene_id)){ st.setLong(gene_param_index,gene_id_param); gene_param_index++; } int i=gene_param_index; while(i<=max_component_length){ st.setNull(i,Types.INTEGER); i++; } rs=st.executeQuery(); HashMap<Integer,Long> organism_gene_map= new HashMap<Integer,Long>(); HashMap<Integer,String> organism_genesequence_map= new HashMap<Integer,String>(); int ortholog_count=0; while (rs.next()) { Long gene_id=rs.getLong("feature_id"); String residues=rs.getString("residues"); Integer gene_strand=rs.getInt("strand"); Integer organism_id=rs.getInt("organism_id"); if(!organism_gene_map.containsKey(organism_id)){ organism_gene_map.put(organism_id,gene_id); organism_genesequence_map.put(organism_id,residues); ortholog_count++; } else{ logger.info("filtered gene "+gene_id+" of organism"+organism_id); organism_gene_map.remove(organism_id); organism_genesequence_map.remove(organism_id); ortholog_count--; } //logger.info(gene_id+"->"+residues); } PanGeneStatisticalType geneStatisticalType; if(ortholog_count==1){ geneStatisticalType=PanGeneStatisticalType.UNIQUE; } else if(ortholog_count==no_of_organisms){ geneStatisticalType=PanGeneStatisticalType.CORE; } else{ geneStatisticalType=PanGeneStatisticalType.MISC; } ArrayList<String> seqArr = new ArrayList<String>(organism_genesequence_map.values()); String consensus_seq=consensusDNASequence(seqArr); // check after applying connected components filter atleast 1 gene remains if(consensus_seq.length()==0) {continue;} fmin=vchromosome.length(); vchromosome.append(consensus_seq); vchromosome.append("nnnnnnnnnn"); fmax=vchromosome.length()-10; featureReLocVirtualGene(update_add_vgene_id,fmin,fmax); PanGeneBioType consensusBiotype=PanGeneBioType.CODING; boolean rna_flag=false; boolean pseudo_flag=false; // add links for (Long tobeadded_rgene_id: tobeupdated_add_vgene_map.get(update_add_vgene_id)){ PanGeneBioType geneBioType=getGeneBioType(tobeadded_rgene_id); consensusBiotype=geneBioType; switch(geneBioType){ case NONCODING: rna_flag=true; break; case PSEUDO: pseudo_flag=true; break; } createOrthoParalink(update_add_vgene_id,tobeadded_rgene_id,0,geneBioType); } if(rna_flag && pseudo_flag){consensusBiotype=PanGeneBioType.MIXED;} updateVirtalGeneType(update_add_vgene_id,consensusBiotype,geneStatisticalType); } st = conn.prepareStatement(gene_residues_query); for( Long update_delete_vgene_id : tobeupdated_delete_vgene_map.keySet()){ int gene_param_index=1; for (Long gene_id_param : tobeupdated_vgene_map.get(update_delete_vgene_id)){ st.setLong(gene_param_index,gene_id_param); gene_param_index++; } int i=gene_param_index; while(i<=max_component_length){ st.setNull(i,Types.INTEGER); i++; } rs=st.executeQuery(); HashMap<Integer,Long> organism_gene_map= new HashMap<Integer,Long>(); HashMap<Integer,String> organism_genesequence_map= new HashMap<Integer,String>(); int ortholog_count=0; while (rs.next()) { Long gene_id=rs.getLong("feature_id"); String residues=rs.getString("residues"); Integer gene_strand=rs.getInt("strand"); Integer organism_id=rs.getInt("organism_id"); if(!organism_gene_map.containsKey(organism_id)){ organism_gene_map.put(organism_id,gene_id); organism_genesequence_map.put(organism_id,residues); ortholog_count++; } else{ logger.info("filtered gene "+gene_id+" of organism"+organism_id); organism_gene_map.remove(organism_id); organism_genesequence_map.remove(organism_id); ortholog_count--; } //logger.info(gene_id+"->"+residues); } PanGeneStatisticalType geneStatisticalType; if(ortholog_count==1){ geneStatisticalType=PanGeneStatisticalType.UNIQUE; } else if(ortholog_count==no_of_organisms){ geneStatisticalType=PanGeneStatisticalType.CORE; } else{ geneStatisticalType=PanGeneStatisticalType.MISC; } ArrayList<String> seqArr = new ArrayList<String>(organism_genesequence_map.values()); String consensus_seq=consensusDNASequence(seqArr); // check after applying connected components filter atleast 1 gene remains if(consensus_seq.length()==0) {continue;} fmin=vchromosome.length(); vchromosome.append(consensus_seq); vchromosome.append("nnnnnnnnnn"); fmax=vchromosome.length()-10; featureReLocVirtualGene(update_delete_vgene_id,fmin,fmax); PanGeneBioType consensusBiotype=PanGeneBioType.CODING; boolean rna_flag=false; boolean pseudo_flag=false; // delete links for (Long tobedeleted_rgene_id: tobeupdated_delete_vgene_map.get(update_delete_vgene_id)){ PanGeneBioType geneBioType=getGeneBioType(tobedeleted_rgene_id); consensusBiotype=geneBioType; switch(geneBioType){ case NONCODING: rna_flag=true; break; case PSEUDO: pseudo_flag=true; break; } deleteOrthoParalink(update_delete_vgene_id,tobedeleted_rgene_id); } if(rna_flag && pseudo_flag){consensusBiotype=PanGeneBioType.MIXED;} updateVirtalGeneType(update_delete_vgene_id,consensusBiotype,geneStatisticalType); } // insert genes // create vgene // featureloc vgene // create orthopara link st = conn.prepareStatement(gene_residues_query); Long componentID= getorganismMaxGeneNumber(vorganism_id.intValue()); componentID++; for( Long new_vgene_id : tobeinserted_uvgene_map.keySet()){ logger.info("insert "+tobeinserted_uvgene_map.get(new_vgene_id)); int gene_param_index=1; for (Long gene_id_param : tobeinserted_uvgene_map.get(new_vgene_id)){ st.setLong(gene_param_index,gene_id_param); gene_param_index++; } int i=gene_param_index; while(i<=max_component_length){ st.setNull(i,Types.INTEGER); i++; } rs=st.executeQuery(); HashMap<Integer,Long> organism_gene_map= new HashMap<Integer,Long>(); HashMap<Integer,String> organism_genesequence_map= new HashMap<Integer,String>(); int ortholog_count=0; while (rs.next()) { Long gene_id=rs.getLong("feature_id"); String residues=rs.getString("residues"); Integer gene_strand=rs.getInt("strand"); Integer organism_id=rs.getInt("organism_id"); if(!organism_gene_map.containsKey(organism_id)){ organism_gene_map.put(organism_id,gene_id); organism_genesequence_map.put(organism_id,residues); ortholog_count++; } else{ logger.info("filtered gene "+gene_id+" of organism"+organism_id); organism_gene_map.remove(organism_id); organism_genesequence_map.remove(organism_id); ortholog_count--; } //logger.info(gene_id+"->"+residues); } PanGeneStatisticalType geneStatisticalType; if(ortholog_count==1){ geneStatisticalType=PanGeneStatisticalType.UNIQUE; } else if(ortholog_count==no_of_organisms){ geneStatisticalType=PanGeneStatisticalType.CORE; } else{ geneStatisticalType=PanGeneStatisticalType.MISC; } ArrayList<String> seqArr = new ArrayList<String>(organism_genesequence_map.values()); String consensus_seq=consensusDNASequence(seqArr); // check after applying connected components filter atleast 1 gene remains if(consensus_seq.length()==0) {continue;} fmin=vchromosome.length(); vchromosome.append(consensus_seq); vchromosome.append("nnnnnnnnnn"); HashMap<String,Long> vgene_map= new HashMap<String,Long>(); vgene_map=createVirtualGene(componentID,vorganism_id.intValue()); //logger.info("Successfully created virtual gene "+vgene_map.get("gene")); fmax=vchromosome.length()-10; featureLocVirtualGene(vgene_map,vorganism_id.intValue(),fmin,fmax); int rank=0; boolean rna_flag=false; boolean pseudo_flag=false; PanGeneBioType consensusBiotype=PanGeneBioType.CODING; for ( Long gene_id : organism_gene_map.values()){ PanGeneBioType geneBioType=getGeneBioType(gene_id); consensusBiotype=geneBioType; switch(geneBioType){ case NONCODING: rna_flag=true; break; case PSEUDO: pseudo_flag=true; break; } createOrthoParalink(vgene_map.get("gene"),gene_id,rank,geneBioType); //logger.info("ortho para link created b/w gene "+vgene_map.get("gene")+"and "+gene_id+ "rank="+rank); rank++; } if(rna_flag && pseudo_flag){consensusBiotype=PanGeneBioType.MIXED;} updateVirtalGeneType(vgene_map.get("gene"),consensusBiotype,geneStatisticalType); logger.info("inserted pan-gene no "+componentID); componentID++; } // delete those remaining genes,mRNA,exon and polypeptides for( Long tobedeleted_vgene_id : tobedeleted_vgene_set){ removeVirtualGene(tobedeleted_vgene_id); } // now update the chromosome updateVirtualChromosome(vchromosome_id,vchromosome); updateorganismMaxGeneNumber(vorganism_id.intValue(),componentID-1); } private ArrayList<List<Long>> get_listof_matching_sets(Set<Long> geneset,Map <Long,Set <Long>> pangenemap){ ArrayList<List<Long>> op_vgeneids_rgeneids_list= new ArrayList<List<Long>>(); Long operation=new Long(0); Long rgene_ids=new Long(0); for ( Long vgene_id : pangenemap.keySet()){ Set <Long> old_geneset= pangenemap.get(vgene_id); HashSet<Long> intersection_set=new HashSet<Long>(old_geneset); intersection_set.retainAll(geneset); HashSet<Long> oldminusnew_set=new HashSet<Long>(old_geneset); oldminusnew_set.removeAll(geneset); HashSet<Long> newminusold_set=new HashSet<Long>(geneset); newminusold_set.removeAll(old_geneset); if(!intersection_set.isEmpty()){ ArrayList<Long> op_vgeneids_rgeneids= new ArrayList<Long>(); if(oldminusnew_set.isEmpty()&&newminusold_set.isEmpty()){ // exact match operation=new Long(0); op_vgeneids_rgeneids.add(operation); op_vgeneids_rgeneids.add(vgene_id); } else if(oldminusnew_set.isEmpty()){ // addition only operation=new Long(1); op_vgeneids_rgeneids.add(operation); op_vgeneids_rgeneids.add(vgene_id); op_vgeneids_rgeneids.addAll(newminusold_set); } else if(newminusold_set.isEmpty()) { // deletion only operation=new Long(2); op_vgeneids_rgeneids.add(operation); op_vgeneids_rgeneids.add(vgene_id); op_vgeneids_rgeneids.addAll(oldminusnew_set); } else{ // addition followed by deletion operation=new Long(3); op_vgeneids_rgeneids.add(operation); op_vgeneids_rgeneids.add(vgene_id); op_vgeneids_rgeneids.addAll(newminusold_set); // delimiter op_vgeneids_rgeneids.add(new Long(0)); op_vgeneids_rgeneids.addAll(oldminusnew_set); } op_vgeneids_rgeneids_list.add(op_vgeneids_rgeneids); } } return op_vgeneids_rgeneids_list; } private int insertVirtualOrganism(String genus,String species,int organism_ids[]) throws Exception{ PreparedStatement st; ResultSet rs; String organismsql = "INSERT INTO organism (abbreviation,genus,species,common_name,comment) values(?,?,?,?,?) RETURNING organism_id"; st = conn.prepareStatement(organismsql); st.setString(1,genus.substring(0,1)+species); st.setString(2,genus); st.setString(3,species); st.setString(4,genus.substring(0,1)+species); st.setString(5,"Pan-genome"); rs = st.executeQuery(); rs.next(); int organism_id=rs.getInt("organism_id"); String organismpropsql = "INSERT INTO organismprop (organism_id,type_id,value,rank) values(?,?,?,0)"; st = conn.prepareStatement(organismpropsql); st.setLong(1,organism_id); st.setLong(2,73772); st.setString(3,"true"); st.addBatch(); String org_idStr=""; for(int i=0;i<organism_ids.length-1;i++) org_idStr+=organism_ids[i]+","; org_idStr+=organism_ids[organism_ids.length-1]; st.setLong(1,organism_id); st.setLong(2,1706); st.setString(3,org_idStr); st.addBatch(); st.executeBatch(); return organism_id; } private void insertorganismMaxGeneNumber(int organism_id,Long maxgeneNumber) throws Exception{ PreparedStatement st; ResultSet rs; String organismpropsql = "INSERT INTO organismprop (organism_id,type_id,value,rank) values(?,?,?,0)"; st = conn.prepareStatement(organismpropsql); st.setInt(1,organism_id); st.setLong(2,26772); st.setString(3,""+maxgeneNumber); st.executeUpdate(); } private void updateorganismMaxGeneNumber(int organism_id,Long maxgeneNumber) throws Exception{ PreparedStatement st; ResultSet rs; String organismpropsql = "UPDATE organismprop SET value=? where type_id=? and organism_id=?"; st = conn.prepareStatement(organismpropsql); st.setString(1,""+maxgeneNumber); st.setLong(2,26772); st.setInt(3,organism_id); st.executeUpdate(); } private Long getorganismMaxGeneNumber(int organism_id) throws Exception{ PreparedStatement st; ResultSet rs; String organismpropsql = "SELECT cast(value as bigint) as maxgenenumber from organismprop where organism_id=? and type_id=?"; st = conn.prepareStatement(organismpropsql); st.setInt(1,organism_id); st.setLong(2,26772); rs = st.executeQuery(); rs.next(); Long maxgenenumber=rs.getLong("maxgenenumber"); return maxgenenumber; } private Long insertVirtualChromosome(StringBuilder vChromosome,int vorganism_id) throws Exception{ PreparedStatement st; ResultSet rs; String uniquename="SALChr"; String chromosomesql = "INSERT INTO feature(organism_id,uniquename,seqlen,residues,type_id) values (?,?,?,?,427) RETURNING feature_id"; st = conn.prepareStatement(chromosomesql); st.setInt(1,vorganism_id); st.setString(2,uniquename); st.setLong(3,vChromosome.length()); st.setString(4,vChromosome.toString()); rs = st.executeQuery(); rs.next(); Long chr_feature_id=rs.getLong("feature_id"); String toplevelfeaturesql = "INSERT INTO featureprop(feature_id,type_id,value,rank) values(?,26753,'true',0)"; st = conn.prepareStatement(toplevelfeaturesql); st.setLong(1,chr_feature_id); st.executeUpdate(); return chr_feature_id; } private void updateVirtualChromosome(Long feature_id,StringBuilder vChromosome) throws Exception{ PreparedStatement st; ResultSet rs; String genesql = "UPDATE feature SET seqlen=?,residues=? where feature_id=?"; st = conn.prepareStatement(genesql); st.setLong(1,vChromosome.length()); st.setString(2,vChromosome.toString()); st.setLong(3,feature_id); st.executeUpdate(); } private HashMap<String,Long> createVirtualGene(Long gene_id,int vorganism_id) throws Exception{ HashMap<String,Long> vgene_map= new HashMap<String,Long>(); PreparedStatement st; ResultSet rs; String genename="SAL"+gene_id; String genesql = "INSERT INTO feature(organism_id,uniquename,type_id) values (?,?,792) RETURNING feature_id"; st = conn.prepareStatement(genesql); st.setInt(1,vorganism_id); st.setString(2,genename); rs = st.executeQuery(); rs.next(); Long gene_feature_id=rs.getLong("feature_id"); String mRNAsql = "INSERT INTO feature(organism_id,uniquename,type_id) values (?,?,321) RETURNING feature_id"; st = conn.prepareStatement(mRNAsql); st.setInt(1,vorganism_id); st.setString(2,genename+".1"); rs = st.executeQuery(); rs.next(); Long mRNA_feature_id=rs.getLong("feature_id"); String exonsql = "INSERT INTO feature(organism_id,uniquename,type_id) values (?,?,234) RETURNING feature_id"; st = conn.prepareStatement(exonsql); st.setInt(1,vorganism_id); st.setString(2,genename+".1:exon:1"); rs = st.executeQuery(); rs.next(); Long exon_feature_id=rs.getLong("feature_id"); String polypeptidesql = "INSERT INTO feature(organism_id,uniquename,type_id) values (?,?,191) RETURNING feature_id"; st = conn.prepareStatement(polypeptidesql); st.setInt(1,vorganism_id); st.setString(2,genename+".1:pep"); rs = st.executeQuery(); rs.next(); Long polypeptide_feature_id=rs.getLong("feature_id"); vgene_map.put("gene",gene_feature_id); vgene_map.put("mRNA",mRNA_feature_id); vgene_map.put("exon",exon_feature_id); vgene_map.put("polypeptide",polypeptide_feature_id); // feature_relationships String vgene_relationshipquerysql="INSERT INTO feature_relationship (subject_id,object_id,type_id) values (?,?,?)"; st = conn.prepareStatement(vgene_relationshipquerysql); // mRNA part_of gene st.setLong(1,mRNA_feature_id); st.setLong(2,gene_feature_id); st.setLong(3,42); st.addBatch(); // exon part_of mRNA st.setLong(1,exon_feature_id); st.setLong(2,mRNA_feature_id); st.setLong(3,42); st.addBatch(); // polypeptide derives_from mRNA st.setLong(1,polypeptide_feature_id); st.setLong(2,mRNA_feature_id); st.setLong(3,69); st.addBatch(); st.executeBatch(); return(vgene_map); } private void updateVirtalGeneType(Long vgene_id,PanGeneBioType bioType,PanGeneStatisticalType statType) throws Exception{ // 1=grey 2=red 3=green 5=cyan int color=0; switch(statType){ case CORE: color=1; break; case MISC: color=3; break; case UNIQUE: color=2; break; } PreparedStatement st; ResultSet rs; String geneColorSql = "" +" UPDATE featureprop set value=?,rank=0" +" where feature_id=? and type_id=26768"; st = conn.prepareStatement(geneColorSql); st.setString(1,""+color); st.setLong(2,vgene_id); if(st.executeUpdate()==0){ geneColorSql = "" +" INSERT INTO featureprop(feature_id,type_id,value,rank) values(?,26768,?,0)"; st = conn.prepareStatement(geneColorSql); st.setLong(1,vgene_id); st.setString(2,""+color); st.executeUpdate(); } //logger.info("gene"+vgene_id+"color updated"); switch(bioType){ case CODING: color=1; break; case NONCODING: color=3; break; case PSEUDO: color=2; break; case MIXED: color=5; break; } String rnaColorSql ="" +" UPDATE featureprop set value=?,rank=0" +" where feature_id=(" +" select rna_feature.feature_id" +" from feature_relationship rna_gene" +" join feature rna_feature on (rna_gene.subject_id=rna_feature.feature_id)" +" join feature gene_feature on (rna_gene.object_id=gene_feature.feature_id)" +" where rna_gene.type_id=42 and gene_feature.feature_id=?" +" and rna_feature.type_id in (321,339,340,361,743,362,734))" +" and type_id=26768"; st = conn.prepareStatement(rnaColorSql); st.setString(1,""+color); st.setLong(2,vgene_id); if(st.executeUpdate()==0){ rnaColorSql ="" +" INSERT INTO featureprop(feature_id,type_id,value,rank)" +" select rna_feature.feature_id,26768 as type_id,? as value,0 as rank" +" from feature_relationship rna_gene" +" join feature rna_feature on (rna_gene.subject_id=rna_feature.feature_id)" +" join feature gene_feature on (rna_gene.object_id=gene_feature.feature_id)" +" where rna_gene.type_id=42 and gene_feature.feature_id=?" +" and rna_feature.type_id in (321,339,340,361,743,362,734)"; st = conn.prepareStatement(rnaColorSql); st.setString(1,""+color); st.setLong(2,vgene_id); st.executeUpdate(); } //logger.info("gene"+vgene_id+"rna color updated"); } private void featureLocVirtualGene(HashMap<String,Long> vgene_map,int organism_id,long fmin,long fmax) throws Exception{ PreparedStatement st; ResultSet rs; String featureLocsql = "" +" INSERT INTO featureloc(feature_id,srcfeature_id,fmin,fmax)" +" SELECT ? as feature_id,feature_id as srcfeature_id,? as fmin,? as fmax" +" from" +" feature where organism_id=? and type_id=427"; st = conn.prepareStatement(featureLocsql); //logger.info("featureloc query= "+featureLocsql); st.setLong(1,vgene_map.get("gene")); st.setLong(2,fmin); st.setLong(3,fmax); st.setInt(4,organism_id); st.addBatch(); st.setLong(1,vgene_map.get("mRNA")); st.setLong(2,fmin); st.setLong(3,fmax); st.setInt(4,organism_id); st.addBatch(); st.setLong(1,vgene_map.get("exon")); st.setLong(2,fmin); st.setLong(3,fmax); st.setInt(4,organism_id); st.addBatch(); st.setLong(1,vgene_map.get("polypeptide")); st.setLong(2,fmin); st.setLong(3,fmax); st.setInt(4,organism_id); st.addBatch(); st.executeBatch(); } private void featureReLocVirtualGene(Long vgene_id,long fmin,long fmax) throws Exception{ PreparedStatement st; ResultSet rs; String featureLocsql = "" +" UPDATE featureloc set fmin=?,fmax=?" +" where feature_id in" +" (select gene.feature_id as feature_id from" +" feature gene" +" join" +" feature_relationship mRNA_gene on (gene.feature_id=mRNA_gene.object_id and mRNA_gene.type_id=42)" +" join" +" feature_relationship exon_mRNA on (mRNA_gene.subject_id=exon_mRNA.object_id and exon_mRNA.type_id=42)" +" join" +" feature_relationship polypeptide_mRNA on (polypeptide_mRNA.object_id=mRNA_gene.subject_id and polypeptide_mRNA.type_id=69)" +" where gene.feature_id=?" +" union" +" select mRNA_gene.subject_id as feature_id from" +" feature gene" +" join" +" feature_relationship mRNA_gene on (gene.feature_id=mRNA_gene.object_id and mRNA_gene.type_id=42)" +" join" +" feature_relationship exon_mRNA on (mRNA_gene.subject_id=exon_mRNA.object_id and exon_mRNA.type_id=42)" +" join" +" feature_relationship polypeptide_mRNA on (polypeptide_mRNA.object_id=mRNA_gene.subject_id and polypeptide_mRNA.type_id=69)" +" where gene.feature_id=?" +" union" +" select exon_mRNA.subject_id as feature_id from" +" feature gene" +" join" +" feature_relationship mRNA_gene on (gene.feature_id=mRNA_gene.object_id and mRNA_gene.type_id=42)" +" join" +" feature_relationship exon_mRNA on (mRNA_gene.subject_id=exon_mRNA.object_id and exon_mRNA.type_id=42)" +" join" +" feature_relationship polypeptide_mRNA on (polypeptide_mRNA.object_id=mRNA_gene.subject_id and polypeptide_mRNA.type_id=69)" +" where gene.feature_id=?" +" union" +" select polypeptide_mRNA.subject_id as feature_id from" +" feature gene" +" join" +" feature_relationship mRNA_gene on (gene.feature_id=mRNA_gene.object_id and mRNA_gene.type_id=42)" +" join" +" feature_relationship exon_mRNA on (mRNA_gene.subject_id=exon_mRNA.object_id and exon_mRNA.type_id=42)" +" join" +" feature_relationship polypeptide_mRNA on (polypeptide_mRNA.object_id=mRNA_gene.subject_id and polypeptide_mRNA.type_id=69)" +" where gene.feature_id=?)"; st = conn.prepareStatement(featureLocsql); //logger.info("featureloc query= "+featureLocsql); st.setLong(1,fmin); st.setLong(2,fmax); st.setLong(3,vgene_id); st.setLong(4,vgene_id); st.setLong(5,vgene_id); st.setLong(6,vgene_id); st.executeUpdate(); } private void removeVirtualGene(Long vgene_id) throws Exception{ PreparedStatement st; ResultSet rs; String deleteVirtualgenesql = "" +" DELETE from feature" +" where feature_id in" +" (select gene.feature_id as feature_id from" +" feature gene" +" join" +" feature_relationship mRNA_gene on (gene.feature_id=mRNA_gene.object_id and mRNA_gene.type_id=42)" +" join" +" feature_relationship exon_mRNA on (mRNA_gene.subject_id=exon_mRNA.object_id and exon_mRNA.type_id=42)" +" join" +" feature_relationship polypeptide_mRNA on (polypeptide_mRNA.object_id=mRNA_gene.subject_id and polypeptide_mRNA.type_id=69)" +" where gene.feature_id=?" +" union" +" select mRNA_gene.subject_id as feature_id from" +" feature gene" +" join" +" feature_relationship mRNA_gene on (gene.feature_id=mRNA_gene.object_id and mRNA_gene.type_id=42)" +" join" +" feature_relationship exon_mRNA on (mRNA_gene.subject_id=exon_mRNA.object_id and exon_mRNA.type_id=42)" +" join" +" feature_relationship polypeptide_mRNA on (polypeptide_mRNA.object_id=mRNA_gene.subject_id and polypeptide_mRNA.type_id=69)" +" where gene.feature_id=?" +" union" +" select exon_mRNA.subject_id as feature_id from" +" feature gene" +" join" +" feature_relationship mRNA_gene on (gene.feature_id=mRNA_gene.object_id and mRNA_gene.type_id=42)" +" join" +" feature_relationship exon_mRNA on (mRNA_gene.subject_id=exon_mRNA.object_id and exon_mRNA.type_id=42)" +" join" +" feature_relationship polypeptide_mRNA on (polypeptide_mRNA.object_id=mRNA_gene.subject_id and polypeptide_mRNA.type_id=69)" +" where gene.feature_id=?" +" union" +" select polypeptide_mRNA.subject_id as feature_id from" +" feature gene" +" join" +" feature_relationship mRNA_gene on (gene.feature_id=mRNA_gene.object_id and mRNA_gene.type_id=42)" +" join" +" feature_relationship exon_mRNA on (mRNA_gene.subject_id=exon_mRNA.object_id and exon_mRNA.type_id=42)" +" join" +" feature_relationship polypeptide_mRNA on (polypeptide_mRNA.object_id=mRNA_gene.subject_id and polypeptide_mRNA.type_id=69)" +" where gene.feature_id=?)"; st = conn.prepareStatement(deleteVirtualgenesql); //logger.info("featureloc query= "+featureLocsql); st.setLong(1,vgene_id); st.setLong(2,vgene_id); st.setLong(3,vgene_id); st.setLong(4,vgene_id); st.executeUpdate(); } private int createOrthoParalink(Long vgene_id,Long orthogene_id,int rank,PanGeneBioType geneBioType) throws Exception{ PreparedStatement st; ResultSet rs; String uniquename="PANGENELINK"+vgene_id+"->"+orthogene_id+""; String orthoparafeaturesql="" +" INSERT INTO feature (organism_id,uniquename,type_id)" +" SELECT organism_id,? as uniquename,436 as type_id" +" from feature where feature_id=?" +" RETURNING feature_id"; st = conn.prepareStatement(orthoparafeaturesql); st.setString(1,uniquename); st.setLong(2,orthogene_id); rs = st.executeQuery(); rs.next(); Long orthoparafeature_id=rs.getLong("feature_id"); String vgene_orthopararelationshipquerysql="INSERT INTO feature_relationship (subject_id,object_id,type_id,rank) values (?,?,78,?)"; st = conn.prepareStatement(vgene_orthopararelationshipquerysql); st.setLong(1,vgene_id); st.setLong(2,orthoparafeature_id); st.setInt(3,rank); st.executeUpdate(); String polypeptide_orthopararelationshipquerysql="" +" INSERT INTO feature_relationship(subject_id,object_id,type_id,rank)" +" SELECT gene_product_RNA.subject_id,? as object_id,78 as type_id,? as rank" +" from" +" feature_relationship RNA_gene " +" join feature_relationship gene_product_RNA on (RNA_gene.type_id=42 and RNA_gene.object_id=? and RNA_gene.subject_id=gene_product_RNA.object_id and gene_product_RNA.type_id=?)"; //logger.info("orthopararelationshipquerysql= "+polypeptide_orthopararelationshipquerysql); //logger.info("orthoparafeature_id="+orthoparafeature_id+ " orthogene_id="+orthogene_id+ " rank="+rank); st = conn.prepareStatement(polypeptide_orthopararelationshipquerysql); st.setLong(1,orthoparafeature_id); st.setInt(2,rank); st.setLong(3,orthogene_id); if(geneBioType==PanGeneBioType.NONCODING){ st.setLong(4,42); // exon part_of RNA } else{ st.setLong(4,69); // polypeptide derives from mRNA } st.executeUpdate(); return 0; } private int deleteOrthoParalink(Long vgene_id,Long orthogene_id) throws Exception{ PreparedStatement st; ResultSet rs; String uniquename="PANGENELINK"+vgene_id+"->"+orthogene_id+""; String orthoparafeaturesql="" +" DELETE from feature " +" where uniquename=?"; st = conn.prepareStatement(orthoparafeaturesql); st.setString(1,uniquename); st.executeUpdate(); return 0; } public static void main(String[] args) throws ClassNotFoundException, SQLException, Exception{ logger.info("hi there...."); if (args.length !=4) { System.err.println("Usage: java GeneIndexManager <jdbc URL> <database username> create <list of organism ids seperated by commas>"); System.err.println("Usage: java GeneIndexManager <jdbc URL> <database username> read <virtual organism id>"); System.err.println("Usage: java GeneIndexManager <jdbc URL> <database username> update <virutal organism id>"); System.err.println("Usage: java GeneIndexManager <jdbc URL> <database username> delete <virtual organism id>"); System.exit(1); } String jdbcURL = args[0]; String databaseUsername = args[1]; String operation = args[2]; String operationparam = args[3]; String databasePassword = System.getProperty("password"); if (databasePassword == null) { databasePassword = new String( System.console().readPassword("Password for %s @ %s: ", databaseUsername, jdbcURL) ); } Class.forName("org.postgresql.Driver"); Connection conn = DriverManager.getConnection(jdbcURL, databaseUsername, databasePassword); if (operation.equals("create")) { logger.info("operation create with param="+operationparam); String sorganism_ids[]=operationparam.split(","); int organism_ids[] = new int[sorganism_ids.length]; for (int i=0;i<sorganism_ids.length;i++){ organism_ids[i]=Integer.parseInt(sorganism_ids[i]); } PanGenomeManager pgm= new PanGenomeManager(conn); conn.setAutoCommit(false); Arrays.sort(organism_ids); //pgm.deletePanGenome("Salmonella","pangenome",organism_ids); int vorganism_id=pgm.insertVirtualOrganism("Salmonella","pangenome",organism_ids); pgm.createPanGenome(organism_ids,vorganism_id); //pgm.deletePanGenome("Salmonella","pangenome",organism_ids); //pgm.updatePanGenome("Salmonella","pangenome"); conn.commit(); logger.info(":)"); } conn.close(); } }