package org.genedb.web.mvc.model.load;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
import org.apache.log4j.Logger;
import org.genedb.db.audit.ChangeSet;
import org.genedb.db.audit.ChangeTracker;
import org.genedb.web.mvc.model.IndexUpdater;
import org.gmod.schema.feature.AbstractGene;
import org.gmod.schema.feature.Polypeptide;
import org.gmod.schema.feature.Transcript;
import org.springframework.context.ConfigurableApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.jdbc.core.simple.SimpleJdbcTemplate;
import org.springframework.transaction.annotation.Transactional;
/**
*
* @author lo2@sangerinstitute
*
*/
@Transactional
public class TranscriptUpdater extends AbstractTranscriptLoader implements IndexUpdater{
Logger logger = Logger.getLogger(TranscriptUpdater.class);
private ChangeTracker changeTracker;
public static void main(String args[])throws Exception{
setUpLogging();
ConfigurableApplicationContext ctx = new ClassPathXmlApplicationContext(
new String[] {"classpath:TranscriptLoader-context.xml"});
TranscriptUpdater transcriptUpdater = ctx.getBean("transcriptUpdater", TranscriptUpdater.class);
transcriptUpdater.executeAll(TranscriptUpdater.class.getName());
}
public void executeAll(String clientName)throws Exception{
ChangeSet changeSet = changeTracker.changes(clientName);
updateAllCaches(changeSet);
}
@Override
public int updateTranscriptCache(ChangeSet changeSet) throws Exception{
int updateCount = 0;
//Processed transcript Ids filter to avoid multiple insert or update of same transcript
Set<Integer> processedTranscriptsIds = new HashSet<Integer>();
//Process new genes
Collection<Integer> featureIds = changeSet.newFeatureIds(AbstractGene.class);
updateCount = updateCount + batchRequest(featureIds, 50, new GeneChangesProcessor(false, processedTranscriptsIds));
//Process changed genes
featureIds = changeSet.changedFeatureIds(AbstractGene.class);
updateCount = updateCount + batchRequest(featureIds, 50, new GeneChangesProcessor(true, processedTranscriptsIds));
//Process deleted genes
featureIds = changeSet.deletedFeatureIds(AbstractGene.class);
updateCount = updateCount + batchRequest(featureIds, 50, new GenesRemover());
//Process new Transcript
featureIds = changeSet.newFeatureIds(Transcript.class);
updateCount = updateCount + batchRequest(featureIds, 50, new TranscriptChangesProcessor(false, processedTranscriptsIds));
//Process changed Transcript
featureIds = changeSet.changedFeatureIds(Transcript.class);
updateCount = updateCount + batchRequest(featureIds, 50, new TranscriptChangesProcessor(true, processedTranscriptsIds));
//Process deleted Transcript
featureIds = changeSet.deletedFeatureIds(Transcript.class);
updateCount = updateCount + batchRequest(featureIds, 50, new TranscriptsRemover());
//Process new Polypeptide
featureIds = changeSet.newFeatureIds(Polypeptide.class);
updateCount = updateCount + batchRequest(featureIds, 50, new PolypeptideChangesProcessor(false, processedTranscriptsIds));
//Process changed Polypeptide
featureIds = changeSet.changedFeatureIds(Polypeptide.class);
updateCount = updateCount + batchRequest(featureIds, 50, new PolypeptideChangesProcessor(true, processedTranscriptsIds));
//Process deleted Polypeptide
featureIds = changeSet.deletedFeatureIds(Polypeptide.class);
updateCount = updateCount + batchRequest(featureIds, 50, new PolypeptidesRemover());
return updateCount;
}
@Override
public boolean updateAllCaches(ChangeSet changeSet){
try{
updateTranscriptCache(changeSet);
}catch(Exception e){
throw new RuntimeException(e);
}
return true;
}
/**
* Batch requests to avoid out of memory issues
* @param featureIds
* @param batchSize
* @param request
* @throws Exception
*/
private int batchRequest(Collection<Integer> featureIds, int batchSize, RequestProcessor request)throws Exception{
int updateCount = 0;
int currentBatchIndex = 0;
List<Integer> subset = new ArrayList<Integer>();
List<Integer> ids = new ArrayList<Integer>(featureIds);
for(ListIterator<Integer> iter = ids.listIterator(); iter.hasNext();){
if (currentBatchIndex < batchSize){
subset.add(iter.next());
++currentBatchIndex;
if(iter.hasNext()){
continue;
}
}
updateCount = updateCount + request.execute(subset);
subset.clear();
currentBatchIndex = 0;
}
return updateCount;
}
/**
* Class to insert or update the transcripts of a collection of genes
* @author lo2@sangerinstitute
*
*/
private class GeneChangesProcessor implements RequestProcessor{
private boolean isUpdate;
private Set<Integer> processedTranscriptsIds = new HashSet<Integer>();
public GeneChangesProcessor(boolean isUpdate, Set<Integer> processedTranscriptsIds){
this.isUpdate = isUpdate;
this.processedTranscriptsIds = processedTranscriptsIds;
}
public int execute(Collection<Integer> featureIds)throws Exception{
Date startTime = new Date();
int updateCount = 0;
try{
//Get the genes
List<FeatureMapper> genes = findGenes(featureIds);
for(FeatureMapper geneMapper: genes){
Date geneProcessingStartTime = new Date();
//get the transcripts
List<FeatureMapper>transcriptMappers = findTranscripts(geneMapper);
//Filter out the processed transcript
filterOutProcessed(processedTranscriptsIds, transcriptMappers);
//If any unprocessed transcript
if(transcriptMappers.size() > 0){
//Get the organism
OrganismMapper organismMapper = template.queryForObject(
OrganismMapper.SQL_WITH_GENE_ID_PARAM, new OrganismMapper(), geneMapper.getFeatureId());
//Init the toplevelfeature arguments of this transcript
FeatureMapper topLevelFeatureMapper = findTopLevelFeature(geneMapper);
//process transcript
updateCount = updateCount + processTranscripts(
organismMapper, topLevelFeatureMapper, geneMapper, transcriptMappers, isUpdate);
//store ids of processed transcripts
storeProcessedIds(processedTranscriptsIds, transcriptMappers);
}
TimerHelper.printTimeLapse(logger, geneProcessingStartTime, "geneProcessingStartTime");
}
}catch(Exception e){
logger.info("Error: ", e);
throw e;
}finally{
logger.info("Update Count: " + updateCount);
}
TimerHelper.printTimeLapse(logger, startTime, "Exit updateGenes");
return updateCount;
}
}
/**
* Class to insert or update a set of transcripts
* @author lo2@sangerinstitute
*
*/
private class TranscriptChangesProcessor implements RequestProcessor{
private boolean isUpdate;
private Set<Integer> processedTranscriptsIds = new HashSet<Integer>();
public TranscriptChangesProcessor(boolean isUpdate, Set<Integer> processedTranscriptsIds){
this.isUpdate = isUpdate;
this.processedTranscriptsIds = processedTranscriptsIds;
}
public int execute(Collection<Integer> featureIds)throws Exception{
Date startTime = new Date();
int updateCount = 0;
try{
//Get the transcript from transcript ids
List<FeatureMapper>transcriptMappers = findTranscriptsFromTranscriptIds(featureIds);
//Process transcripts loaded from the featureIds
updateCount = processTranscript(transcriptMappers, processedTranscriptsIds, isUpdate);
}catch(Exception e){
logger.info("Error: ", e);
throw e;
}finally{
logger.info("Update Count: " + updateCount);
}
TimerHelper.printTimeLapse(logger, startTime, "Exit updateGenes");
return updateCount;
}
}
/**
* Class to insert or update a set of transcripts
* @author lo2@sangerinstitute
*
*/
private class PolypeptideChangesProcessor implements RequestProcessor{
private boolean isUpdate;
private Set<Integer> processedTranscriptsIds = new HashSet<Integer>();
public PolypeptideChangesProcessor(boolean isUpdate, Set<Integer> processedTranscriptsIds){
this.isUpdate = isUpdate;
this.processedTranscriptsIds = processedTranscriptsIds;
}
public int execute(Collection<Integer> featureIds)throws Exception{
Date startTime = new Date();
int updateCount = 0;
try{
//Get the transcripts from peps ids
List<FeatureMapper>transcriptMappers = findTranscriptsFromPolypeptideIds(featureIds);
//Process transcripts loaded from the featureIds
updateCount = processTranscript(transcriptMappers, processedTranscriptsIds, isUpdate);
}catch(Exception e){
logger.info("Error: ", e);
throw e;
}finally{
logger.info("Update Count: " + updateCount);
}
TimerHelper.printTimeLapse(logger, startTime, "Exit updateGenes");
return updateCount;
}
}
/**
* Process a transcript
* @param transcriptMappers
* @param processedTranscriptsIds
* @param isUpdate
* @return
* @throws Exception
*/
private int processTranscript(List<FeatureMapper>transcriptMappers, Set<Integer> processedTranscriptsIds, boolean isUpdate)
throws Exception{
int updateCount = 0;
//Filter out the processed transcript
filterOutProcessed(processedTranscriptsIds, transcriptMappers);
for(FeatureMapper transcriptMapper: transcriptMappers){
Date geneProcessingStartTime = new Date();
FeatureMapper geneMapper = template.queryForObject(
GeneMapper.SQL_WITH_TRANSCRIPT_ID_PARAM, new GeneMapper(), transcriptMapper.getFeatureId());
//Get the organism
OrganismMapper organismMapper = template.queryForObject(
OrganismMapper.SQL_WITH_GENE_ID_PARAM, new OrganismMapper(), geneMapper.getFeatureId());
//Init the toplevelfeature arguments of this transcript
FeatureMapper topLevelFeatureMapper = findTopLevelFeature(geneMapper);
//process transcript
List<FeatureMapper> singleItemList = new ArrayList<FeatureMapper>();
singleItemList.add(transcriptMapper);
updateCount = updateCount + processTranscripts(
organismMapper, topLevelFeatureMapper, geneMapper, singleItemList, isUpdate);
TimerHelper.printTimeLapse(logger, geneProcessingStartTime, "geneProcessingStartTime");
}
//store ids of processed transcripts
storeProcessedIds(processedTranscriptsIds, transcriptMappers);
return updateCount;
}
/**
* Remove a collection of transcripts of a collection of genes
* @author lo2@sangerinstitute
*
*/
private class GenesRemover implements RequestProcessor{
public int execute(Collection<Integer> featureIds)throws Exception{
String sql = "delete from transcript where gene_id in (placeholders)";
sql = sql.replace("placeholders", formatPlaceholders(featureIds.size()));
return template.update(sql, featureIds.toArray((Object[])new Integer[0]));
}
}
/**
* Remove a collection of transcripts
* @author lo2@sangerinstitute
*
*/
private class TranscriptsRemover implements RequestProcessor{
public int execute(Collection<Integer> featureIds)throws Exception{
String sql = "delete from transcript where transcript_id in (:placeholders)";
sql = sql.replace(":placeholders", formatPlaceholders(featureIds.size()));
return template.update(sql, featureIds.toArray((Object[])new Integer[0]));
}
}
/**
* Remove a collection of transcripts
* @author lo2@sangerinstitute
*
*/
private class PolypeptidesRemover implements RequestProcessor{
public int execute(Collection<Integer> featureIds)throws Exception{
int deletes = 0;
String placeholders = formatPlaceholders(featureIds.size());
//Delete the transcript featurecvterm where related polypeptides are found
String sql = "delete transcript_featurecvterm where polypeptide_id in (:placeholders)";
sql = sql.replace(":placeholders", placeholders);
deletes = template.update(sql, featureIds);
//Delete the transcript featureprop where related polypeptides are found
sql = "delete transcript_featureprop where polypeptide_id in (:placeholders)";
sql = sql.replace(":placeholders", placeholders);
deletes = deletes + template.update(sql, featureIds);
return deletes;
}
}
private List<FeatureMapper> findGenes(Collection<Integer> featureIds){
String sql = GeneMapper.SQL_WITH_GENE_ID_PARAMS;
sql = sql.replace(":placeholders", formatPlaceholders(featureIds.size()));
//Create the mapper and get the genes
List<FeatureMapper> genes = template.query(
sql, new GeneMapper(), featureIds.toArray((Object[])new Integer[0]));
logger.info("Genes size: " + genes.size());
return genes;
}
private List<FeatureMapper> findTranscriptsFromTranscriptIds(Collection<Integer> featureIds){
String sql = TranscriptMapper.SQL_WITH_TRANSCRIPT_ID_PARAMS;
sql = sql.replace(":placeholders", formatPlaceholders(featureIds.size()));
//Create the mapper and get the genes
List<FeatureMapper> transcripts = template.query(
sql, new TranscriptMapper(), featureIds.toArray((Object[])new Integer[featureIds.size()]));
logger.info("Transcript size: " + transcripts.size());
return transcripts;
}
private List<FeatureMapper> findTranscriptsFromPolypeptideIds(Collection<Integer> featureIds){
String sql = TranscriptMapper.SQL_WITH_POLYPEPTIDE_ID_PARAMS;
sql = sql.replace(":placeholders", formatPlaceholders(featureIds.size()));
//Create the mapper and get the genes
List<FeatureMapper> transcripts = template.query(
sql, new TranscriptMapper(), featureIds.toArray((Object[])new Integer[0]));
logger.info("Transcript size: " + transcripts.size());
return transcripts;
}
private interface RequestProcessor{
public int execute(Collection<Integer> featureIds)throws Exception;
}
private String formatPlaceholders(int count){
StringBuffer sb = new StringBuffer();
for(int i=0; i<count; ++i){
sb.append("?");
if(i+1<count){
sb.append(",");
}
}
return sb.toString();
}
/**
* Filter out the processed transcript to avoid re-processing of alreaddy processed transcripts
* @param transcriptMappers
* @return
*/
private void filterOutProcessed(Set<Integer> processedIds, List<FeatureMapper>transcriptMappers){
for(ListIterator<FeatureMapper> iter = transcriptMappers.listIterator(); iter.hasNext(); ){
if(processedIds.contains(iter.next().getFeatureId())){
iter.remove();
}
}
}
/**
* Add the ids of processed transcript to help prevent re-processing of already processed transcripts
* @param processedIds
* @param transcriptMappers
*/
private void storeProcessedIds(Set<Integer> processedIds, List<FeatureMapper>transcriptMappers){
for(ListIterator<FeatureMapper> iter = transcriptMappers.listIterator(); iter.hasNext(); ){
processedIds.add(iter.next().getFeatureId());
}
}
public SimpleJdbcTemplate getTemplate() {
return template;
}
public void setTemplate(SimpleJdbcTemplate template) {
this.template = template;
}
public ChangeTracker getChangeTracker() {
return changeTracker;
}
public void setChangeTracker(ChangeTracker changeTracker) {
this.changeTracker = changeTracker;
}
}