package org.genedb.web.mvc.model.simple; import org.genedb.web.mvc.model.load.TimerHelper; import org.apache.log4j.Logger; import org.springframework.context.ConfigurableApplicationContext; import org.springframework.context.support.ClassPathXmlApplicationContext; import org.springframework.dao.EmptyResultDataAccessException; import org.springframework.jdbc.core.simple.SimpleJdbcTemplate; import org.springframework.transaction.annotation.Transactional; import org.springframework.util.StringUtils; import java.util.Date; import java.util.List; /** * * @author lo2@sangerinstitute * */ @Transactional public class SimpleTranscriptLoader { public static void main(String args[]) throws Exception { setUpLogging(); ConfigurableApplicationContext ctx = new ClassPathXmlApplicationContext(new String[] { "classpath:SimpleTranscriptLoader-context.xml" }); SimpleTranscriptLoader transcriptLoader = ctx.getBean("simpleTranscriptLoader", SimpleTranscriptLoader.class); String[] orgs = { "Tbruceibrucei927", "Pfalciparum" }; for (String org : orgs) { transcriptLoader.load(org, Integer.MAX_VALUE); } } /** * Set up logging */ private static void setUpLogging() { // String log4jprops = "/log4j.TranscriptLoader.properties"; // URL url = TranscriptLoader.class.getResource(log4jprops); // System.out.printf("Configuring Log4J from '%s'\n", url); // PropertyConfigurator.configure(url); } /** * Load transcripts of all organisms * * @param limit */ // public int loadAll(int limit)throws Exception{ // int loadCount = 0; // List<OrganismMapper> organisms = template.query( // OrganismMapper.GET_ALL_ORGANISMS_SQL, new OrganismMapper()); // for(OrganismMapper organismMapper: organisms){ // logger.info("Loading Organism: " + organismMapper.getCommonName()); // loadCount = loadCount + load(organismMapper.getCommonName(), limit); // } // return loadCount; // } Logger logger = Logger.getLogger(SimpleTranscriptLoader.class); protected SimpleJdbcTemplate template; public SimpleJdbcTemplate getTemplate() { return template; } public void setTemplate(SimpleJdbcTemplate template) { this.template = template; } /** * Find the genes for the given organism * * @param organismMapper * @param offset * @param limit * @return */ private List<SimpleGene> findGenes(int organismId, int offset, int limit) { logger.info(String.format("Offset is %s and Limit is %s", offset, limit)); // Create the mapper and get the genes List<SimpleGene> genes = template.query(SimpleGeneMapper.GET_GENES_SQL_WITH_LIMIT_AND_OFFSET_PARAMS, new SimpleGeneMapper(), organismId, limit, offset); logger.info("Genes size: " + genes.size()); for (SimpleGene gene : genes) { String parent = template.queryForObject("select uniquename from feature where feature_id=" + gene.getSourceFeatureId(), String.class); gene.setTopLevelFeatureUniqueName(parent); } return genes; } /** * Find the transcript from the Gene * * @param geneMapper * @return */ private List<SimpleTranscript> findTranscripts(SimpleGene gene) { // get the transcripts Date transcriptGetStartTime = new Date(); List<SimpleTranscript> transcripts = template.query(SimpleTranscriptMapper.SQL_WITH_GENE_ID_PARAM, new SimpleTranscriptMapper(), gene .getFeatureId()); logger.info("Transcripts size: " + transcripts.size()); TimerHelper.printTimeLapse(logger, transcriptGetStartTime, "transcriptGetStartTime"); return transcripts; } private SimplePolypeptide initTranscriptProteinArguments(SimpleTranscript transcript) throws Exception { // Init the derived polypeptides details if ( !isProductiveTranscript(transcript)) { return null; } try { SimplePolypeptide ret = template.queryForObject(SimplePolypeptideMapper.SQL, new SimplePolypeptideMapper(), transcript .getFeatureId()); try { String product =template.queryForObject( "select c.name from cvterm c, feature_cvterm fc, cv cv where fc.feature_id=" + ret.getFeatureId() + " and fc.cvterm_id=c.cvterm_id and c.cv_id=cv.cv_id and cv.name='genedb_products' limit 1", String.class); ret.setProduct(product); } catch (EmptyResultDataAccessException exp) { System.err.println(String.format("Got no product for '%s' ('%d')", ret.getUniqueName(), ret.getFeatureId())); // No product - just carry on } return ret; } catch (Exception exp) { throw new RuntimeException("Failed to get result for '" + transcript.getFeatureId() + "'", exp); } } /** * Insert * * @param args * @return * @throws Exception */ private int insertDenormalisedTranscript(SimpleGene gene, SimpleTranscript transcript, SimplePolypeptide protein, String commonName) throws Exception { logger.debug("Enter insertDenormalisedTranscript"); Date startTime = new Date(); int update = 0; int proteinId = 0; String proteinName = null; String product = null; if (protein != null) { proteinId = protein.getFeatureId(); proteinName = quote(protein.getUniqueName()); product = protein.getProduct(); if (product != null) { if (product.length() > 120) { product = product.substring(0, 120) + "..."; } product = quote(product); } } String args = StringUtils.arrayToCommaDelimitedString(new Object[] { transcript.getFeatureId(), quote(transcript.getCvtName()), quote(transcript.getUniqueName()), product, gene.getFeatureId(), quote(gene.getUniqueName()), proteinId, proteinName, quote(gene.getTopLevelFeatureUniqueName()), gene.getFmin(), quote(commonName) }); // System.err.println(args); try { logger.info("Loading Transcript: " + transcript.getFeatureId()); update = template.update("insert into transcript_names(" + "transcript_id," + "transcript_cvterm_name," + "transcript_uniquename," + "product," + "gene_id," + "gene_uniquename," + "protein_id," + "protein_uniquename," + "top_level_feature_uniquename," + "fmin," + "organism_common_name" + ")" + " values(" + args + ")"); } catch (Exception exp) { logger.error(args, exp); throw exp; } TimerHelper.printTimeLapse(logger, startTime, "insertDenormalisedTranscript"); logger.debug("transcript loaded......"); logger.debug("\n"); return update; } private boolean isProductiveTranscript(SimpleTranscript transcript) { logger.debug("Enter isProductiveTranscript"); if ("mRNA".equals(transcript.getCvtName()) || "pseudogenic_transcript".equals(transcript.getCvtName())) { return true; } return false; } /** * Choose organism to load * * @param organismName * @param limit * @param offset * @return rows loaded */ private int load(String organismName, int limit) throws Exception { logger.debug(String.format("Enter load(%s)", organismName)); Date startTime = new Date(); // Get the organism int organismId = template.queryForInt("select organism_id from organism where common_name='" + organismName + "'"); int loadCount = 0; int offset = 1; List<SimpleGene> genes = null; try { do { // Get the genes for this organism genes = findGenes(organismId, offset, limit); for (SimpleGene gene : genes) { Date geneProcessingStartTime = new Date(); // get the transcripts List<SimpleTranscript> transcripts = findTranscripts(gene); // process transcript loadCount += processTranscripts(gene, transcripts, organismName); TimerHelper.printTimeLapse(logger, geneProcessingStartTime, "geneProcessingStartTime"); } // increase the offset offset = offset + limit; } while (genes != null && limit <= genes.size()); } catch (Exception e) { logger.info("Error: ", e); throw e; } finally { logger.info("Load Count: " + loadCount); } TimerHelper.printTimeLapse(logger, startTime, String.format("Exit load(%s)", organismName)); return loadCount; } /** * Process each transcript derived from the gene * * @param organismMapper * @param topLevelFeatureMapper * @param geneMapper * @param transcriptMappers * @param isUpdate * or insert (for this method call) * @return * @throws Exception */ private int processTranscripts(SimpleGene gene, List<SimpleTranscript> simpleTranscripts, String commonName) throws Exception { int loadCount = 0; for (SimpleTranscript transcript : simpleTranscripts) { Date transcriptProcessingStartTime = new Date(); logger.info("Adding..." + transcript.getFeatureId()); // Init the derived polypeptides details SimplePolypeptide polypeptide = initTranscriptProteinArguments(transcript); // Insert into the transcript_cache table loadCount += insertDenormalisedTranscript(gene, transcript, polypeptide, commonName); logger.info("Added..." + transcript.getFeatureId()); TimerHelper.printTimeLapse(logger, transcriptProcessingStartTime, "transcriptProcessingTime"); } return loadCount; } private String quote(String in) { String out = in.replace("'", "`"); return "'" + out + "'"; } }