package org.genedb.web.mvc.model.load;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.genedb.web.mvc.model.types.DBXRefType;
import org.genedb.web.mvc.model.types.DtoObjectArrayField;
import org.genedb.web.mvc.model.types.DtoStringArrayField;
import org.genedb.web.mvc.model.types.SynonymType;
import org.genedb.web.mvc.model.types.TranscriptRegionType;
import org.springframework.jdbc.core.simple.SimpleJdbcTemplate;
import org.springframework.util.CollectionUtils;
public abstract class AbstractTranscriptLoader {
private Logger logger = Logger.getLogger(AbstractTranscriptLoader.class);
/**
* Set up logging
*/
protected static void setUpLogging() {
String log4jprops = "/log4j.TranscriptLoader.properties";
URL url = TranscriptLoader.class.getResource(log4jprops);
System.out.printf("Configuring Log4J from '%s'\n", url);
PropertyConfigurator.configure(url);
}
protected SimpleJdbcTemplate template;
protected int saveOrUpdate(HashMap<String, Object> args, boolean isUpdate) throws Exception {
if (!isUpdate) {
return insertDenormalisedTranscript(args);
}
return updateDenormalisedTranscript(args);
}
/**
* Insert
* @param args
* @return
* @throws Exception
*/
public int insertDenormalisedTranscript(HashMap<String, Object> args) throws Exception {
logger.debug("Enter insertDenormalisedTranscript");
Date startTime = new Date();
logger.debug(String.format("Field args size: %s", args.size()));
int update = 0;
try {
logger.info("Loading Transcript: " + args.get("transcript_id"));
update = template.update("insert into transcript(" +
"transcript_id," +
"transcript_name," +
"transcript_cvterm_name," +
"transcript_cv_name," +
"transcript_uniquename," +
"transcript_time_last_modified," +
"gene_id," +
"gene_name," +
"gene_time_last_modified," +
"gene_fmax," +
"gene_fmin," +
"gene_strand," +
"gene_cvterm_name," +
"gene_cv_name," +
"organism_id," +
"organism_common_name," +
"top_level_feature_name," +
"top_level_feature_seqlen," +
"top_level_feature_uniquename," +
//"top_level_feature_type," +
"top_level_feature_type" +
//"cluster_ids," +
//"orthologue_names," +
//"publications," +
//"synonyms," +
//"exons," +
//"dbx_refs" +
")" +
" values(" +
":transcript_id," +
":transcript_name," +
":transcript_cvterm_name," +
":transcript_cv_name," +
":transcript_uniquename," +
":transcript_time_last_modified," +
":gene_id," +
":gene_name," +
":gene_time_last_modified," +
":gene_fmax," +
":gene_fmin," +
":gene_strand," +
":gene_cvterm_name," +
":gene_cv_name," +
":organism_id," +
":organism_common_name," +
":top_level_feature_name," +
":top_level_feature_seqlen," +
":top_level_feature_uniquename," +
//":top_level_feature_type," +
":top_level_feature_type" +
//":cluster_ids," +
//":orthologue_names," +
//":publications," +
//":synonyms," +
//":transcript_regions," +
//":dbx_refs" +
") ", args);
} catch(Exception e) {
String message = null;
for (String key : args.keySet()) {
message = message + String.format("%s: %s\n", key, args.get(key));
}
logger.error(message, e);
throw e;
}
TimerHelper.printTimeLapse(logger, startTime, "insertDenormalisedTranscript");
logger.debug("trans loaded......");
logger.debug("\n");
return update;
}
/**
* Update
* @param args
* @return
* @throws Exception
*/
public int updateDenormalisedTranscript(HashMap<String, Object> args) throws Exception {
logger.debug("Enter updateDenormalisedTranscript");
Date startTime = new Date();
logger.debug(String.format("Field args size: %s", args.size()));
int update = 0;
try {
update = template.update("update transcript values set" +
" transcript_name = :transcript_name," +
" transcript_cvterm_name = :transcript_cvterm_name," +
" transcript_cv_name = :transcript_cv_name," +
" transcript_uniquename = :transcript_uniquename," +
" transcript_time_last_modified = :transcript_time_last_modified," +
" gene_id = :gene_id," +
" gene_name= :gene_name," +
" gene_time_last_modified = :gene_time_last_modified," +
" gene_fmax = :gene_fmax," +
" gene_fmin = :gene_fmin," +
" gene_strand = :gene_strand," +
" gene_cvterm_name = :gene_cvterm_name," +
" gene_cv_name = :gene_cv_name," +
" organism_id = :organism_id," +
" organism_common_name = :organism_common_name," +
" top_level_feature_name = :top_level_feature_name," +
" top_level_feature_seqlen = :top_level_feature_seqlen," +
" top_level_feature_uniquename = :top_level_feature_uniquename," +
" top_level_feature_type = :top_level_feature_type," +
" cluster_ids = :cluster_ids," +
" orthologue_names = :orthologue_names," +
" publications = :publications," +
" synonyms = :synonyms," +
" transcript_regions = :transcript_regions," +
" dbx_refs = :dbx_refs" +
" where transcript_id = :transcript_id ", args);
} catch(Exception e) {
String message = null;
for (String key : args.keySet()) {
message = message + String.format("%s: %s\n", key, args.get(key));
}
logger.error(message, e);
throw e;
}
TimerHelper.printTimeLapse(logger, startTime, "updateDenormalisedTranscript");
logger.debug("trans loaded......");
logger.debug("\n");
return update;
}
/**
* Process each transcript derived from the gene
* @param organismMapper
* @param topLevelFeatureMapper
* @param geneMapper
* @param transcriptMappers
* @param isUpdate or insert (for this method call)
* @return
* @throws Exception
*/
protected int processTranscripts(
OrganismMapper organismMapper, FeatureMapper topLevelFeatureMapper,
FeatureMapper geneMapper, List<FeatureMapper> transcriptMappers, boolean isUpdate) throws Exception{
int loadCount = 0;
for (FeatureMapper transcriptMapper: transcriptMappers) {
Date transcriptProcessingStartTime = new Date();
logger.info("Adding..." + transcriptMapper.getFeatureId());
HashMap<String, Object> args = new HashMap<String, Object>();
//Init the Organism arguments of this transcript
initOrganismArguments(args, organismMapper);
//Init the toplevelfeature details
initTopLevelArguments(args, topLevelFeatureMapper);
//Init the gene arguments of this transcript
initGeneArguments(args, geneMapper);
//Init the transcript arguments
initTranscriptArguments(args, transcriptMapper);
//Init the synonyms
initSynonymTypeArguments(args, transcriptMapper);
//Init the transcript region
initTranscriptRegionTypeArguments(args, transcriptMapper);
//Init the derived polypeptides details
FeatureMapper polypeptideMapper = initTranscriptProteinArguments(
args, transcriptMapper);
//Insert into the transcript_cache table
loadCount = loadCount + saveOrUpdate(args, isUpdate);
if(isUpdate){
//delete transcript child records in transcript_featurecvterm and transcript_prop
deleteTranscriptChildren(transcriptMapper.getFeatureId());
}
if(polypeptideMapper!= null){
//Insert into the transcript_featurecvterm table
TranscriptFeatureCVTermLoader.load(
transcriptMapper.getFeatureId(), polypeptideMapper, template);
//Insert into the transcript_featureprop table
TranscriptFeaturePropLoader.load(
transcriptMapper.getFeatureId(), polypeptideMapper, template);
}
logger.info("Added..." + transcriptMapper.getFeatureId());
TimerHelper.printTimeLapse(logger, transcriptProcessingStartTime, "transcriptProcessingTime");
}
return loadCount;
}
/**
* Find the Top Level Feature from the Gene
*/
protected FeatureMapper findTopLevelFeature(FeatureMapper geneMapper){
//Init the toplevelfeature arguments of this transcript
Date topLevelFeatureGetStartTime = new Date();
FeatureMapper topLevelFeatureMapper = template.queryForObject(
TopLevelFeatureMapper.SQL,
new TopLevelFeatureMapper(), geneMapper.getSourceFeatureId());
TimerHelper.printTimeLapse(logger, topLevelFeatureGetStartTime, "topLevelFeatureGetStartTime");
return topLevelFeatureMapper;
}
/**
* Find the transcript from the Gene
* @param geneMapper
* @return
*/
protected List<FeatureMapper> findTranscripts(FeatureMapper geneMapper){
//get the transcripts
Date transcriptGetStartTime = new Date();
List<FeatureMapper>transcriptMappers = template.query(
TranscriptMapper.SQL_WITH_GENE_ID_PARAM, new TranscriptMapper(), geneMapper.getFeatureId());
logger.info("Transcripts size: " + transcriptMappers.size());
TimerHelper.printTimeLapse(logger, transcriptGetStartTime, "transcriptGetStartTime");
return transcriptMappers;
}
protected void initSynonymTypeArguments(HashMap<String, Object> args, FeatureMapper transcriptMapper){
logger.debug("Enter initSynonymTypeArguments");
List<SynonymType> synonyms = template.query(
SynonymTypeMapper.SQL, new SynonymTypeMapper(), transcriptMapper.getFeatureId());
DtoObjectArrayField objectField = new DtoObjectArrayField("synonymtype", synonyms);
args.put("synonyms", objectField);
logger.debug("Exit initSynonymTypeArguments");
}
protected void initTranscriptRegionTypeArguments(HashMap<String, Object> args, FeatureMapper transcriptMapper){
logger.debug("Enter initSynonymTypeArguments");
List<TranscriptRegionType> transcriptRegions = template.query(
TranscriptRegionMapper.SQL, new TranscriptRegionMapper(), transcriptMapper.getFeatureId());
DtoObjectArrayField objectField = new DtoObjectArrayField("transcriptregiontype", transcriptRegions);
args.put("transcript_regions", objectField);
logger.debug("Exit initTranscriptRegionTypeArguments");
}
private void initOrganismArguments(HashMap<String, Object> args, OrganismMapper organismMapper){
logger.debug("Enter initOrganismArguments");
args.put("organism_common_name", organismMapper.getCommonName());
args.put("organism_id", organismMapper.getOrganismId());
logger.debug("Exit initOrganismArguments");
}
private void initGeneArguments(HashMap<String, Object> args, FeatureMapper geneMapper){
logger.debug("Enter initGeneArguments");
args.put("gene_name", geneMapper.getName());
args.put("gene_id", geneMapper.getFeatureId());
args.put("gene_time_last_modified", geneMapper.getTimeLastModified());
args.put("gene_fmax", geneMapper.getFmax());
args.put("gene_fmin", geneMapper.getFmin());
args.put("gene_strand", geneMapper.getStrand());
args.put("gene_cvterm_name", geneMapper.getCvName());
args.put("gene_cv_name", geneMapper.getCvtName());
logger.debug("Exit initGeneArguments");
}
private void initTopLevelArguments(HashMap<String, Object> args, FeatureMapper topLevelFeature){
logger.debug("Enter initTopLevelArguments");
args.put("top_level_feature_name", topLevelFeature.getName());
args.put("top_level_feature_seqlen", topLevelFeature.getSeqLen());
args.put("top_level_feature_type", topLevelFeature.getCvtName());
args.put("top_level_feature_uniquename", topLevelFeature.getUniqueName());
logger.debug("Exit initTopLevelArguments");
}
private void initTranscriptArguments(HashMap<String, Object> args, FeatureMapper transcriptMapper){
logger.debug("Enter initTranscriptArguments");
args.put("transcript_id", transcriptMapper.getFeatureId());
args.put("transcript_time_last_modified", transcriptMapper.getTimeLastModified());
args.put("transcript_uniquename", transcriptMapper.getUniqueName());
args.put("transcript_name", transcriptMapper.getName());
args.put("transcript_cvterm_name", transcriptMapper.getCvtName());
args.put("transcript_cv_name", transcriptMapper.getCvName());
args.put("organism_id", transcriptMapper.getOrganismId());
logger.debug("Exit initTranscriptArguments");
}
private boolean isProductiveTranscript(FeatureMapper transcriptMapper){
logger.debug("Enter isProductiveTranscript");
if("sequence".equals(transcriptMapper.getCvName())){
if ("mRNA".equals(transcriptMapper.getCvtName())
|| "pseudogenic_transcript".equals(transcriptMapper.getCvtName())){
return true;
}
}
return false;
}
private FeatureMapper initTranscriptProteinArguments(HashMap<String, Object> args, FeatureMapper transcriptMapper)
throws Exception{
//Init the derived polypeptides details
FeatureMapper polypeptideMapper = null;
if (isProductiveTranscript(transcriptMapper)){
polypeptideMapper = template.queryForObject(
PolypeptideMapper.SQL, new PolypeptideMapper(), transcriptMapper.getFeatureId());
initPolypeptideArguments(args, polypeptideMapper);
}else{
args.put("dbx_refs", new DtoObjectArrayField("dbxreftype", new ArrayList<DBXRefType>()));
DtoStringArrayField emptyArr = new DtoStringArrayField(null);
args.put("publications", emptyArr);
args.put("cluster_ids", emptyArr);
args.put("orthologue_names", emptyArr);
}
return polypeptideMapper;
}
/**
*
* @param args
* @param polypeptideMapper
* @throws Exception
*/
private void initPolypeptideArguments(HashMap<String, Object> args, FeatureMapper polypeptideMapper)
throws Exception{
Date startTime = new Date();
logger.debug("Enter initPolypeptideArguments");
if(polypeptideMapper!= null){
args.put("polypeptide_time_last_modified", polypeptideMapper.getTimeLastModified());
//Get the dbxref details
List<DBXRefType> dbxrefs = template.query(
DbxRefMapper.SQL, new DbxRefMapper(), polypeptideMapper.getFeatureId());
DtoObjectArrayField objectField = new DtoObjectArrayField("dbxreftype", dbxrefs);
args.put("dbx_refs", objectField);
if(dbxrefs.size()>0){
logger.info("DbxRef: " + objectField);
}
//Get publications
List<String> pubNames = template.query(
PubNameMapper.SQL, new PubNameMapper(), polypeptideMapper.getFeatureId());
args.put("publications", new DtoStringArrayField(pubNames));
//Get polypeptide properties
// PeptideProperties properties = PolypeptidePropertiesHelper.calculateStats(polypeptideMapper);
// if (properties!= null){
// logger.info("Polypep properties: Amino "
// + properties.getAminoAcids() +", "
// + properties.getCharge() + ", Charge"
// + properties.getIsoelectricPoint()+ ", IsoElec"
// + properties.getMass() + ", Mass In Daltons "
// + properties.getMassInDaltons() );
// args.put("polypeptide_properties", new PeptidePropertiesType(properties));
// }else{
// args.put("polypeptide_properties", null);
// logger.error("Peptide Properties for (featureid: "+polypeptideMapper.getFeatureId() +")is null");
// }
//Get the clusertIds and orthologueNames
initPepClusterIdsAndOrthologueNames(args, polypeptideMapper);
}
logger.debug("Exit initPolypeptideArguments");
TimerHelper.printTimeLapse(logger, startTime, "initPolypeptideArguments");
}
/**
*
* @param args
* @param polypeptideMapper
*/
private void initPepClusterIdsAndOrthologueNames(HashMap<String, Object> args, FeatureMapper polypeptideMapper){
Date startTime = new Date();
List<String> cluserIds = new ArrayList<String>();
List<String> orthorloguesNames = new ArrayList<String>();
List<ClusterIdAndOrthologueNamesMapper> clusterIdAndOrthologueNamesMappers = template.query(
ClusterIdAndOrthologueNamesMapper.SQL,
new ClusterIdAndOrthologueNamesMapper(), polypeptideMapper.getFeatureId());
for(ClusterIdAndOrthologueNamesMapper mapper: clusterIdAndOrthologueNamesMappers){
if (mapper.getCvtName().equals("protein_match")){
cluserIds.add(mapper.getUniqueName());
}else if(mapper.getCvtName().equals("polypeptide")){
orthorloguesNames.add(mapper.getUniqueName());
}
}
//initialise the fields
args.put("cluster_ids", new DtoStringArrayField(cluserIds));
args.put("orthologue_names", new DtoStringArrayField(orthorloguesNames));
TimerHelper.printTimeLapse(logger, startTime, "initPepClusterIdsAndOrthologueNames");
}
public int deleteTranscriptChildren(Integer transcriptId)throws Exception{
String sql = "delete from transcript_featurecvterm where transcript_id = ?";
int count = template.update(sql, transcriptId);
sql = "delete from transcript_featureprop where transcript_id = ?";
count = count + template.update(sql, transcriptId);
return count;
}
public SimpleJdbcTemplate getTemplate() {
return template;
}
public void setTemplate(SimpleJdbcTemplate template) {
this.template = template;
}
}