/*
* Copyright (c) 2006 Genome Research Limited.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Library General Public License as published
* by the Free Software Foundation; either version 2 of the License or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this program; see the file COPYING.LIB. If not, write to
* the Free Software Foundation Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307 USA
*/
package org.genedb.db.loading;
import org.genedb.db.dao.OrganismDao;
import org.gmod.schema.feature.Contig;
import org.gmod.schema.feature.Supercontig;
import org.gmod.schema.feature.TopLevelFeature;
import org.gmod.schema.mapped.Feature;
import org.gmod.schema.mapped.Organism;
import org.apache.log4j.Logger;
import org.hibernate.Session;
import org.hibernate.SessionFactory;
import org.hibernate.criterion.Restrictions;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Configurable;
import org.springframework.orm.hibernate3.SessionFactoryUtils;
import org.springframework.transaction.annotation.Transactional;
/**
* Load a FASTA file into the database as a concatenated sequence of contigs.
*
*/
@Transactional(rollbackFor=DataError.class) // Will also rollback for runtime exceptions, by default
@Configurable
public class FastaLoader {
private static final Logger logger = Logger.getLogger(FastaLoader.class);
@Autowired
private SessionFactory sessionFactory;
@Autowired
private OrganismDao organismDao;
// Configurable parameters
private Organism organism;
private Class<? extends TopLevelFeature> topLevelFeatureClass = Supercontig.class;
private Class<? extends TopLevelFeature> entryClass = Contig.class;
public enum OverwriteExisting {YES, NO}
private OverwriteExisting overwriteExisting = OverwriteExisting.NO;
/**
* Set the organism into which to load data.
*
* @param organismCommonName the common name of the organism
*/
public void setOrganismCommonName(String organismCommonName) {
this.organism = organismDao.getOrganismByCommonName(organismCommonName);
if (organism == null) {
throw new IllegalArgumentException(String.format("Organism '%s' not found", organismCommonName));
}
}
/**
* Set the class of top-level feature that this FASTA file represents.
* The default, if this method is not called, is <code>Supercontig</code>.
*
* @param topLevelFeatureClass
*/
public void setTopLevelFeatureClass(Class<? extends TopLevelFeature> topLevelFeatureClass) {
this.topLevelFeatureClass = topLevelFeatureClass;
}
/**
* Set the class of feature that each entry in this FASTA file represents.
* The default, if this method is not called, is <code>Contig</code>.
*
* @param entryClass
*/
public void setEntryClass(Class<? extends TopLevelFeature> entryClass) {
this.entryClass = entryClass;
}
/**
* Whether we should overwrite an existing top-level feature if it has
* the same name as the one specified in this file. The default, if this
* method is not called, is <code>NO</code>.
*
* If overwriteExisting is <code>NO</code>, the file will be skipped on the
* grounds that it's already loaded. If it's <code>YES</code>, the previously
* existing top-level feature, and features located on it, will
* be deleted first.
*
* @param overwriteExisting <code>YES</code> if we should overwrite an
* existing top-level feature, or <code>NO</code> if not.
*/
public void setOverwriteExisting(OverwriteExisting overwriteExisting) {
this.overwriteExisting = overwriteExisting;
}
/**
* This method is called once for each FASTA file.
*
* @param fileId the identifier of the file
* @param records the records the file contains
*/
public void load(String fileId, Iterable<FastaRecord> records) {
logger.debug(String.format("beginFastaFile(%s)", fileId));
Session session = SessionFactoryUtils.doGetSession(sessionFactory, false);
StringBuilder concatenatedSequences = new StringBuilder();
Feature existingTopLevelFeature = (Feature) session.createCriteria(Feature.class)
.add(Restrictions.eq("organism", organism))
.add(Restrictions.eq("uniqueName", fileId))
.uniqueResult();
if (existingTopLevelFeature != null) {
switch (overwriteExisting) {
case YES:
existingTopLevelFeature.delete();
break;
case NO:
logger.error(String.format("The organism '%s' already has feature '%s'",
organism.getCommonName(), fileId));
return;
}
}
TopLevelFeature topLevelFeature = null;
if (topLevelFeatureClass != null) {
topLevelFeature = TopLevelFeature.make(topLevelFeatureClass, fileId, organism);
topLevelFeature.markAsTopLevelFeature();
session.persist(topLevelFeature);
}
int start = 0;
for (FastaRecord record: records) {
String id = record.getId();
String sequence = record.getSequence();
if (topLevelFeature != null) {
concatenatedSequences.append(sequence);
}
int end = start + sequence.length();
TopLevelFeature entry = TopLevelFeature.make(entryClass, id, organism);
entry.setResidues(sequence);
if (topLevelFeature == null) {
entry.markAsTopLevelFeature();
} else {
topLevelFeature.addLocatedChild(entry, start, end);
}
session.persist(entry);
start = end;
}
if (topLevelFeature != null) {
topLevelFeature.setResidues(concatenatedSequences.toString());
}
}
}