package org.genedb.db.loading.auxiliary; import org.gmod.schema.utils.CvTermUtils; import org.apache.log4j.Logger; import org.hibernate.Session; import org.hibernate.connection.ConnectionProvider; import org.hibernate.engine.SessionFactoryImplementor; import org.springframework.context.ApplicationContext; import org.springframework.context.support.ClassPathXmlApplicationContext; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.nio.CharBuffer; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.SQLException; import javax.sql.DataSource; /** * First deletes duplicate GO terms, then redundant GO terms * * A GO annotation is a duplicate when it has the same : * <ul> * <li>GO accession * <li>evidence code (not case sensistive) * <li>and PMID (in the pub table) * </ul> * * A GO annotation is redundant when: * <ul> * <li> It is inferred from electronic annotation, * <li> There is also a more specific term present. * </ul> * These redundant terms add no new information, so should * be removed. This class removes them. It should be used * whenever new IEA terms have been added: for example, * after loading InterPro data. * * Duplicates must be removed before removing redundant terms * otherwise DeleteRedundantGOTermsSQL will remove them both * * @author rh11 */ public class DeleteRedundantGOTerms { private static final Logger logger = Logger.getLogger(DeleteRedundantGOTerms.class); public static void main(String[] args) throws SQLException, IOException { ApplicationContext ctx = new ClassPathXmlApplicationContext( new String[] {"Load.xml"}); DataSource dataSource = ctx.getBean("dataSource", DataSource.class); deleteRedundantGOTerms(dataSource); } public static void deleteRedundantGOTerms(DataSource dataSource) throws SQLException, IOException { deleteRedundantGOTerms(dataSource.getConnection()); } public static void deleteRedundantGOTerms(Session session) throws SQLException, IOException { SessionFactoryImplementor sessionFactoryImplementer = (SessionFactoryImplementor) session.getSessionFactory(); ConnectionProvider connectionProvider = sessionFactoryImplementer.getConnectionProvider(); deleteRedundantGOTerms(connectionProvider.getConnection()); } public static void deleteRedundantGOTerms(Connection conn) throws SQLException, IOException { new DeleteRedundantGOTerms(conn) .deleteRedundantGOTerms() .closeConnection(); } private Connection conn; public DeleteRedundantGOTerms (Connection conn) throws SQLException { this.conn = conn; conn.setAutoCommit(false); } private DeleteRedundantGOTerms deleteRedundantGOTerms() throws SQLException, IOException { CvTermUtils.checkCvTermPath(conn); PreparedStatement st = conn.prepareStatement(getDeleteDuplicateGOTermsSQL()); try { int numDeleted = st.executeUpdate(); logger.info(String.format("Deleted %d duplicate GO annotations", numDeleted)); } finally { try {st.close();} catch (SQLException e) {logger.error(e);} } PreparedStatement st2 = conn.prepareStatement(getDeleteRedundantGOTermsSQL()); try { int numDeleted = st2.executeUpdate(); logger.info(String.format("Deleted %d redundant GO annotations", numDeleted)); } finally { try {st2.close();} catch (SQLException e) {logger.error(e);} } return this; // for method chaining } /** * Size of buffer into which the SQL file is read. * Must be at least as large as the file (measured in characters). */ private static final int BUF_SIZE = 32768; private String getDeleteDuplicateGOTermsSQL() throws IOException { InputStream inputStream = getClass().getResourceAsStream("/delete_duplicate_GO_terms.sql"); if (inputStream == null) throw new RuntimeException("Could not find 'delete_duplicate_GO_terms.sql' on classpath"); Reader reader = new InputStreamReader(inputStream); CharBuffer sqlBuffer = CharBuffer.allocate(BUF_SIZE); int numCharsRead = reader.read(sqlBuffer); logger.debug(String.format("Read %d chars from delete_duplicate_GO_terms.sql", numCharsRead)); reader.close(); inputStream.close(); sqlBuffer.position(0); return sqlBuffer.subSequence(0, numCharsRead).toString(); } private String getDeleteRedundantGOTermsSQL() throws IOException { InputStream inputStream = getClass().getResourceAsStream("/delete_redundant_GO_terms.sql"); if (inputStream == null) throw new RuntimeException("Could not find 'delete_redundant_GO_terms.sql' on classpath"); Reader reader = new InputStreamReader(inputStream); CharBuffer sqlBuffer = CharBuffer.allocate(BUF_SIZE); int numCharsRead = reader.read(sqlBuffer); logger.debug(String.format("Read %d chars from delete_redundant_GO_terms.sql", numCharsRead)); reader.close(); inputStream.close(); sqlBuffer.position(0); return sqlBuffer.subSequence(0, numCharsRead).toString(); } private void closeConnection() throws SQLException { conn.commit(); conn.close(); } }