/* * Copyright (c) 2009 Genome Research Limited. * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU Library General Public License as published by the Free * Software Foundation; either version 2 of the License or (at your option) any * later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more * details. * * You should have received a copy of the GNU Library General Public License * along with this program; see the file COPYING.LIB. If not, write to the Free * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307 * USA */ package org.genedb.jogra.services; import org.genedb.db.taxon.TaxonNode; import org.genedb.db.taxon.TaxonNodeManager; import org.genedb.jogra.domain.Term; import org.genedb.jogra.domain.FeatureCvTerm; import org.apache.log4j.Logger; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.jdbc.core.RowMapper; import org.springframework.util.StringUtils; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import javax.sql.DataSource; /** * This class implements the Data Access Layer for the Term Rationaliser. It contains all the SQL needed for * querying, updating and deleting terms. We use the Spring JDBCTemplate here to have more control over the * sql we execute rather than using the existing DAOs which use Hibernate. It also does make db access faster. * There shouldn't any SQL anywhere else in the classes related to the Rationaliser. * * 31.3.2010: * Actually, after some experimentation it was discovered that using a plain JDBC connection here * would really speed up the database work (particularly at start-up). Hence, the methods below are being * re-done using a JDBC connection. To start with we get a new connection to the database each time and * close it at the end. This may be replaced with a connection pool system later on. We also throw SQLExceptions * which are meant to be caught at the interface level to notify the user. * * @author nds */ public class SqlTermService implements TermService { private static final Logger logger = Logger.getLogger(SqlTermService.class); /* Configured during runtime */ private JdbcTemplate jdbcTemplate; private TaxonNodeManager taxonNodeManager; private DataSource dataSource; /** * Takes a list of taxonnodes (organisms) and a cv name * and returns all the corresponding terms. This is the * list of terms for the left hand side of the rationaliser. **/ @Override public List<Term> getTerms(List<TaxonNode> selectedTaxons, final String cvName) throws SQLException { List<Term> terms = new ArrayList<Term>(); String commaSeparatedNames = this.getTaxonNamesInSQLFormat(selectedTaxons); String SQL_TO_GET_TERMS = "select distinct cvterm.name, cvterm.cvterm_id " + "from cvterm " + "join cv on cvterm.cv_id=cv.cv_id " + "join feature_cvterm on feature_cvterm.cvterm_id=cvterm.cvterm_id " + "join feature on feature.feature_id=feature_cvterm.feature_id " + "join organism on organism.organism_id=feature.organism_id " + "where cv.name= ? and organism.common_name IN (" + commaSeparatedNames + ");"; Connection connection = dataSource.getConnection(); PreparedStatement preparedStatement = connection.prepareStatement(SQL_TO_GET_TERMS); preparedStatement.setString(1, cvName); ResultSet resultSet = preparedStatement.executeQuery(); logger.info("Executed : " + preparedStatement.toString()); while(resultSet.next()){ terms.add(new Term(resultSet.getInt("cvterm_id"),resultSet.getString("name"),cvName));; } connection.close(); return terms; } /** * Gets all the terms in the specified cv. These are the terms * that will be displayed on the right side of the rationaliser */ public List<Term> getAllTerms(final String cvName) throws SQLException{ List<Term> terms = new ArrayList<Term>(); String SQL_TO_GET_TERMS = "select cvterm.name, cvterm.cvterm_id " + "from cvterm " + "join cv on cvterm.cv_id=cv.cv_id " + "where cv.name=? ;"; Connection connection = dataSource.getConnection(); PreparedStatement preparedStatement = connection.prepareStatement(SQL_TO_GET_TERMS); preparedStatement.setString(1, cvName); ResultSet resultSet = preparedStatement.executeQuery(); logger.info("Executed : " + preparedStatement.toString()); while(resultSet.next()){ terms.add(new Term(resultSet.getInt("cvterm_id"),resultSet.getString("name"),cvName));; } connection.close(); return terms; } /** * This method rationalises terms. If there is no cvterm corresponding to the newtext, one is created. * For each of the old terms, if the changeAll boolean is set to true, then the term is just updated * in-situ. If not, then just the annotations for the selected organisms are changed to the new term. * If the scope of the old term is within the selected organisms, the old term is also deleted along * with its dbxref. This method returns a RationaliserResult. See documentation in the Rationaliser- * Results class for more details of what it is. */ public RationaliserResult rationaliseTerm(List<Term> oldTerms, String newText, List<TaxonNode> selectedTaxons) throws SQLException{ RationaliserResult result = new RationaliserResult(); String cvName = oldTerms.get(0).getType(); /*...................*/ for(Term old: oldTerms){ if(!old.getName().equals(newText)){ //Quick sanity check here, if(old.getName().equalsIgnoreCase(newText)){ /* If the terms just differ in case, we don't bother * fetching anything from the database. We just * update the cvterm and the dbxref in-situ and * record these activities in our rationaliser * result object. */ jdbcTemplate.update("update cvterm " + "set name=? where cvterm_id=?;", new Object[]{newText, old.getId()}); jdbcTemplate.update("update dbxref " + "set accession= ? where dbxref_id=?;", new Object[]{newText, this.getDbxrefId(newText, cvName)}); result.added(new Term(old.getId(), newText, cvName)); result.deletedGeneral(old); result.deletedSpecific(old); result.setMessage(String.format("Changed all annotations from '%s' to '%s'. \n", old.getName(), newText)); logger.info(String.format("Changed the case of the cvterm name and dbxref accession to %s", newText)); }else{ /* However, if the terms are different, then we should * check if the new term already exists in the cv and * fetch it. If it does not exist, we create one and * record it in the rationaliser result. */ Term newTerm = getTerm(newText, cvName); if(newTerm == null){ newTerm = addTerm(newText, cvName); result.added(newTerm); logger.info("Just added term into set in result: " + newTerm.getName()); result.setMessage(String.format("Created '%s' in cv. \n", newText)); } /* Then we get all the annotations within the scope of the * chosen organisms for this term. For each feature_id, * check if an annotation with the new correct term * already exists. If not, add a new feature_cvterm * with the details of the old one and delete the * old one. */ if(newTerm!=null){ //Another sanity check as something may have gone wrong with sql insert List<FeatureCvTerm> annotations = this.getAnnotations(selectedTaxons, old); for (FeatureCvTerm fcvt: annotations){ if(!existsFeatureCvterm(fcvt.getFeatureId(), newTerm.getId())){ addAnnotation(new FeatureCvTerm(fcvt.getFeatureId(), /*feature_id*/ newTerm.getId(), /*cvterm_id*/ fcvt.getPubId(), /*pub_id*/ fcvt.getRank(), /*rank*/ fcvt.getIsNot())); /*is_not*/ } deleteAnnotation(fcvt); } /* After changing all the annotations, * this term is no longer relevant to * the selected organisms but is still a * term in the cv, and hence not deleted * from the general list. */ result.deletedSpecific(old); result.setMessage(String.format("Changed annotations within selected organisms from '%s' to '%s'.\n", old.getName(), newText)); logger.info(String.format("Changed relevant annotations from '%s' to '%s'",old.getName(), newText)); /* If scope of old term is within the user's chosen organisms, * we delete the old term and corresponding dbxref */ if(selectedTaxons.containsAll(getTermScope(old))){ jdbcTemplate.execute("delete from cvterm where cvterm_id=" + old.getId()); jdbcTemplate.update("delete from dbxref where accession=?", new Object[]{old.getName()} ); /* Now the term is gone from the cv. This can often * happen with terms that are, for instance, * misspelt. */ result.deletedGeneral(old); result.setMessage(String.format("Deleted term '%s'. \n", old.getName())); logger.info(String.format("Deleted old cvterm '%s' and dbxref.", old.getName())); } }else{ result.setMessage(String.format("Something went wrong with inserting a new cvterm called %s! Hence, skipped. \n", newText)); } } }else{ result.setMessage(String.format("Error: Oops! Both the old term and new term are the same (%s)! Hence, skipped. \n", newText)); } } return result; } /** * This method takes a term and a list of taxons that the user has selected, * and returns the list of systematic IDs for this term (within the scope * of the selected taxons). If selectedTaxons is null, then it returns all * the associated systematic IDs. */ public List<String> getSystematicIDs(Term term, List<TaxonNode> selectedTaxons){ String SQL_TO_GET_SYS_IDS; if(selectedTaxons!=null){ String namesInSQLFormat = getTaxonNamesInSQLFormat(selectedTaxons); SQL_TO_GET_SYS_IDS = " select distinct feature.uniquename from feature, feature_cvterm, organism" + " where feature_cvterm.cvterm_id=" + term.getId() + " and feature.feature_id=feature_cvterm.feature_id " + " and feature.organism_id=organism.organism_id" + " and organism.common_name IN (" + namesInSQLFormat +")"; }else{ SQL_TO_GET_SYS_IDS = " select distinct feature.uniquename from feature, feature_cvterm" + " where feature_cvterm.cvterm_id=" + term.getId() + " and feature.feature_id=feature_cvterm.feature_id " ; } logger.info(SQL_TO_GET_SYS_IDS); List<String> idList = jdbcTemplate.queryForList(SQL_TO_GET_SYS_IDS, String.class); return idList; } /** * This method takes a term and returns a list of evidence codes, if any. * The evidence code cvterm is gotten by querying the cvterm with name 'evidence' */ public List<String> getEvidenceCodes(Term term){ String SQL_TO_GET_EV_CODES = "select distinct feature_cvtermprop.value from " + "feature_cvtermprop, feature_cvterm, cvterm where " + "feature_cvtermprop.type_id=cvterm.cvterm_id and " + "cvterm.name='evidence' and " + "feature_cvtermprop.feature_cvterm_id=feature_cvterm.feature_cvterm_id and " + "feature_cvterm.cvterm_id =" + term.getId(); //logger.info(SQL_TO_GET_EV_CODES); List<String> evcList = jdbcTemplate.queryForList(SQL_TO_GET_EV_CODES, String.class); return evcList; } /** * Returns a term with the given name if it exists or * null if it doesn't */ public Term getTerm(String name, final String type){ RowMapper<Term> mapper = new RowMapper<Term>() { public Term mapRow(ResultSet rs, int rowNum) throws SQLException { Term term = new Term(rs.getInt("cvterm_id"), rs.getString("name"), type); return term; } }; int cv_id = getCvIdByCvType(type); Term term; try{ //Does this cvterm exist in this cv? term = (Term)jdbcTemplate.queryForObject( "select cvterm.cvterm_id," + "cvterm.name " + "from cvterm where name=? and cv_id=? ", new Object[]{name, cv_id}, mapper); logger.info(String.format("Term %s already exists in cv %s.", name, type)); return term; }catch(org.springframework.dao.EmptyResultDataAccessException dae){ //Grr! Wish queryforobject just returned null when no data was found! return null; } } /** PRIVATE HELPER METHODS **/ /** * Add a new cvterm to the specified cv and return * the corresponding Term object */ private Term addTerm(String name, final String type){ RowMapper<Term> mapper = new RowMapper<Term>() { public Term mapRow(ResultSet rs, int rowNum) throws SQLException { Term term = new Term(rs.getInt("cvterm_id"), rs.getString("name"), type); return term; } }; Term term; int cv_id = getCvIdByCvType(type); int dbxref_id = getDbxrefId(name, type); //Get the dbxref_id for this term or create one if there isn't one jdbcTemplate.update("insert into cvterm (cv_id, name, dbxref_id) values (?,?,?)", new Object[]{cv_id, name, dbxref_id}); logger.info(String.format("Created term %s in cv %s.", name, type)); try{ term = (Term)jdbcTemplate.queryForObject( "select cvterm.cvterm_id," + "cvterm.name " + "from cvterm where name=? and cv_id=? ", new Object[]{name, cv_id}, mapper); return term; }catch(org.springframework.dao.EmptyResultDataAccessException dae){ //Grr! Wish queryforobject just returned null when no data was found! return null; } } /** * Private helper method to get the taxon scope for a given term. * Returns the set of taxons that have features annotated using this * term. */ private Set<TaxonNode> getTermScope(Term term){ List<String> taxonNames = jdbcTemplate.queryForList("select distinct o.common_name " + "from feature_cvterm fcvt, organism o, feature f " + "where fcvt.cvterm_id = " + term.getId() + " " + "and fcvt.feature_id = f.feature_id " + "and f.organism_id=o.organism_id ", String.class); Set<TaxonNode> scope = new HashSet<TaxonNode>(); for(String name: taxonNames){ scope.add(taxonNodeManager.getTaxonNodeForLabel(name)); } return scope; } /** * Takes a list of taxons and returns their names in a comma-separated string. * This is different to the taxonNode.getAllChildrenNamesInSQLFormat() because the * taxons here can be in various places in the phylogeny tree. */ private String getTaxonNamesInSQLFormat(List<TaxonNode> taxons){ List<String> temp = new ArrayList<String>(); for(TaxonNode t: taxons){ temp.addAll(t.getAllChildrenNamesInSQLFormat()); } return StringUtils.collectionToCommaDelimitedString(temp); } /** * Takes the cv name like 'genedb_products' and returns the corresponding database id */ private int getDbIdByCvType(String type){ String SQL_TO_GET_DB_ID = "select db.db_id " + "from db, dbxref, cvterm, cv " + "where cv.name= '" + type + "' " + "and cvterm.cv_id=cv.cv_id " + "and cvterm.dbxref_id=dbxref.dbxref_id " + "and dbxref.db_id=db.db_id limit 1"; logger.info(SQL_TO_GET_DB_ID); int db_id = jdbcTemplate.queryForInt(SQL_TO_GET_DB_ID); return db_id; } /** * Takes the cv name like 'genedb_products' and returns the corresponding cv id */ private int getCvIdByCvType(String type){ logger.info("Trying to execute: select cv_id from cv where name="+type); int cv_id = jdbcTemplate.queryForInt("select cv_id from cv where name=?", new Object[]{ type }); //Get cv id return cv_id; } /** * Returns the dbxref_id given the accession and the cvtype * If it does not exist, a dbxref record is added and * the new dbxref_id returned */ private int getDbxrefId(String accession, String cvtype){ int db_id = getDbIdByCvType(cvtype); int dbxref_id; String SQL_TO_GET_DBXREF = "select dbxref_id " + "from dbxref where accession=? and db_id=?"; try{ dbxref_id = jdbcTemplate.queryForInt(SQL_TO_GET_DBXREF, new Object[]{accession, db_id}); }catch(org.springframework.dao.EmptyResultDataAccessException dae){ jdbcTemplate.update("insert into dbxref (db_id, accession) values (?,?)", new Object[]{db_id, accession}); dbxref_id = jdbcTemplate.queryForInt(SQL_TO_GET_DBXREF, new Object[]{accession, db_id}); } return dbxref_id; } /** * Given the scope of taxons and a term. returns all the relevant * annotations (i.e., list of featureCvTerm objects) * @param selectedTaxons * @param term * @return */ private List<FeatureCvTerm> getAnnotations(List<TaxonNode> selectedTaxons, Term term){ RowMapper<FeatureCvTerm> mapper = new RowMapper<FeatureCvTerm>() { public FeatureCvTerm mapRow(ResultSet rs, int rowNum) throws SQLException { FeatureCvTerm fcvterm = new FeatureCvTerm( rs.getInt("feature_cvterm_id"), rs.getInt("feature_id"), rs.getInt("cvterm_id"), rs.getInt("pub_id"), rs.getInt("rank"), rs.getBoolean("is_not")); return fcvterm; } }; List<FeatureCvTerm> annotations = new ArrayList<FeatureCvTerm>(); String commaSeparatedNames = this.getTaxonNamesInSQLFormat(selectedTaxons); String SQL_TO_GET_ANNOTATIONS = " select feature_cvterm.* " + " from feature, feature_cvterm, organism" + " where feature_cvterm.cvterm_id=" + term.getId() + " and feature.feature_id=feature_cvterm.feature_id " + " and feature.organism_id=organism.organism_id" + " and organism.common_name IN (" + commaSeparatedNames +")"; annotations = jdbcTemplate.query(SQL_TO_GET_ANNOTATIONS, mapper); return annotations; } /** * Does this feature_cvterm already exist? * (Given a feature_id and a cvterm_id) * Would it be more useful to return the * feature_cvterm_id at some stage? */ private boolean existsFeatureCvterm(int feature_id, int cvterm_id){ String SQL_TO_GET_FEATURE_CVTERM = "select feature_cvterm_id " + "from feature_cvterm " + "where feature_id=? " + "and cvterm_id=?"; try{ int fcid = jdbcTemplate.queryForInt(SQL_TO_GET_FEATURE_CVTERM, new Object[]{feature_id, cvterm_id}); return true; }catch(org.springframework.dao.EmptyResultDataAccessException dae){ return false; } } /** * Adds the given featurecvterm to the database * @param fcvt */ private void addAnnotation(FeatureCvTerm fcvt){ String SQL_TO_ADD_ANNOT = "insert into feature_cvterm" + "(feature_id, cvterm_id, pub_id, is_not, rank)" + "values " + "(?,?,?,?,?)"; jdbcTemplate.update(SQL_TO_ADD_ANNOT, new Object[]{ fcvt.getFeatureId(), fcvt.getCvtermId(), fcvt.getPubId(), fcvt.getIsNot(), fcvt.getRank()}); } /** * Delete a feature_cvterm * @param selectedTaxons * @param term */ private void deleteAnnotation(FeatureCvTerm fcvt){ String SQL_TO_DELETE_ANNOT =" delete from feature_cvterm " + " where feature_cvterm_id = " + fcvt.getFeatureCvtermId(); jdbcTemplate.execute(SQL_TO_DELETE_ANNOT); } /** INJECTED **/ public void setDataSource(DataSource dataSource) { this.dataSource = dataSource; this.jdbcTemplate = new JdbcTemplate(dataSource); } public void setTaxonNodeManager(TaxonNodeManager taxonNodeManager) { this.taxonNodeManager = taxonNodeManager; } }