package org.genedb.db.dao;
import org.gmod.schema.feature.AbstractGene;
import org.gmod.schema.feature.CytoplasmicRegion;
import org.gmod.schema.feature.GPIAnchorCleavageSite;
import org.gmod.schema.feature.HelixTurnHelix;
import org.gmod.schema.feature.MembraneStructure;
import org.gmod.schema.feature.MembraneStructureComponent;
import org.gmod.schema.feature.NonCytoplasmicRegion;
import org.gmod.schema.feature.Polypeptide;
import org.gmod.schema.feature.PolypeptideDomain;
import org.gmod.schema.feature.PolypeptideRegion;
import org.gmod.schema.feature.SignalPeptide;
import org.gmod.schema.feature.Transcript;
import org.gmod.schema.feature.TransmembraneRegion;
import org.gmod.schema.mapped.Analysis;
import org.gmod.schema.mapped.CvTerm;
import org.gmod.schema.mapped.DbXRef;
import org.gmod.schema.mapped.Feature;
import org.gmod.schema.mapped.FeatureCvTerm;
import org.gmod.schema.mapped.FeatureDbXRef;
import org.gmod.schema.mapped.FeatureLoc;
import org.gmod.schema.mapped.FeatureProp;
import org.gmod.schema.mapped.FeatureRelationship;
import org.gmod.schema.mapped.FeatureSynonym;
import org.gmod.schema.mapped.Organism;
import org.gmod.schema.mapped.Synonym;
import org.gmod.schema.utils.CountedName;
import org.gmod.schema.utils.GeneNameOrganism;
import org.apache.log4j.Logger;
import org.hibernate.Query;
import org.springframework.transaction.annotation.Transactional;
import java.util.ArrayList;
import java.util.List;
@Transactional
public class SequenceDao extends BaseDao {
private static final Logger logger = Logger.getLogger(SequenceDao.class);
private CvDao cvDao;
/**
* Return the feature corresponding to this feature_id
*
* @param id the systematic id
* @return the Feature, or null
*/
public Feature getFeatureById(int id) {
return (Feature) getSession().load(Feature.class, id);
}
/**
* This method is deprecated. Use {@link #getFeatureByUniqueName(String,Class<T>)} instead.
* @param name the uniquename
* @param featureType the type of feature to return eg "gene". <b>NB</> String, not a type argument
* @return
*/
@Deprecated
public Feature getFeatureByUniqueName(String uniqueName, String featureType) {
@SuppressWarnings("unchecked")
List<Feature> features = getSession().createQuery(
"from Feature where uniqueName=:uniqueName and type.name=:featureType")
.setString("uniqueName", uniqueName).setString("featureType", featureType)
.list();
if (features.size() > 0) {
return features.get(0);
}
return null;
}
/**
* Get the feature with the specified unique name and type.
* If there is no such feature, logs a message at level <code>INFO</code>
* and returns <code>null</code>.
*
* @param <T>
* @param uniqueName the unique name of the feature
* @param featureClass the type of feature, e.g. <code>Polypeptide.class</code>
* @return the feature, or <code>null</code> if there isn't such a feature
* @throws RuntimeException if there is more than one feature with the
* specified unique name and type
*/
public <T extends Feature> T getFeatureByUniqueName(String uniqueName, Class<T> featureClass) {
@SuppressWarnings("unchecked")
List<T> features = getSession().createQuery(
"from "+featureClass.getName()+" where uniqueName=:uniqueName")
.setString("uniqueName", uniqueName)
.list();
if (features.size() == 0) {
logger.info(String.format("Hibernate found no feature of type '%s' with uniqueName '%s'",
featureClass.getSimpleName(), uniqueName));
return null;
}
if (features.size() > 1) {
throw new RuntimeException(String.format("Found more than one feature of type '%s' with uniqueName '%s'",
featureClass.getSimpleName(), uniqueName));
}
return features.get(0);
}
public Feature getFeatureByUniqueName(String uniqueName) {
@SuppressWarnings("unchecked")
List<Feature> features = getSession().createQuery(
"from Feature where uniqueName=:uniqueName")
.setString("uniqueName", uniqueName)
.list();
if (features.size() == 0) {
logger.info(String.format("Hibernate found no feature with uniqueName '%s'",
uniqueName));
return null;
}
if (features.size() > 1) {
throw new RuntimeException(String.format("Found more than one feature with uniqueName '%s'",
uniqueName));
}
return features.get(0);
}
public AbstractGene getGene(Feature f) {
logger.info("getGene("+ f.getUniqueName() +")");
if (f instanceof AbstractGene) {
logger.info(" FOUND!");
return (AbstractGene)f;
}
for (FeatureRelationship fr : f.getFeatureRelationshipsForSubjectId()) {
if ((fr.getType().getName().equals("part_of") && fr.getType().getCv().getName().equals("relationship"))
|| (fr.getType().getName().equals("derives_from") && fr.getType().getCv().getName().equals("sequence"))) {
AbstractGene gene = getGene(fr.getObjectFeature());
if (gene != null) {
return gene;
}
}
}
return null;
}
public Transcript getTranscript(Feature f) {
if (f == null) {
return null;
}
logger.info("getTranscript("+ f.getUniqueName() +")");
if (f instanceof Transcript) {
logger.info(" FOUND!");
return (Transcript)f;
}
for (FeatureRelationship fr : f.getFeatureRelationshipsForSubjectId()) {
if ((fr.getType().getName().equals("part_of") && fr.getType().getCv().getName().equals("relationship"))
|| (fr.getType().getName().equals("derives_from") && fr.getType().getCv().getName().equals("sequence"))) {
Transcript t = getTranscript(fr.getObjectFeature());
if (t != null) {
return t;
}
}
}
return null;
}
/**
* Get the feature with the specified unique name and type, from the
* specified organism.
* If there is no such feature, logs a message at level <code>INFO</code>
* and returns <code>null</code>.
*
* @param <T>
* @param uniqueName the unique name of the feature
* @param organismCommonName the common name of the organism
* @param featureClass the type of feature, e.g. <code>Polypeptide.class</code>
* @return the feature, or <code>null</code> if there isn't such a feature
* @throws RuntimeException if there is more than one feature with the
* specified unique name and type
*/
public <T extends Feature> T getFeatureByUniqueNameAndOrganismCommonName(String uniqueName, String organismCommonName, Class<T> featureClass) {
@SuppressWarnings("unchecked")
List<T> features = getSession().createQuery(
"from "+featureClass.getName()+" where uniqueName=:uniqueName and organism.commonName = :organism")
.setString("uniqueName", uniqueName)
.setString("organism", organismCommonName)
.list();
if (features.size() == 0) {
logger.info(String.format("Hibernate found no feature of type '%s' with uniqueName '%s' in organism '%s'",
featureClass.getSimpleName(), uniqueName, organismCommonName));
return null;
}
if (features.size() > 1) {
throw new RuntimeException(String.format("Found more than one feature of type '%s' with uniqueName '%s' in organism '%s'",
featureClass.getSimpleName(), uniqueName, organismCommonName));
}
return features.get(0);
}
/**
* Get the feature with the specified unique name patter and type, from the
* specified organism.
* If there is no such feature, logs a message at level <code>INFO</code>
* and returns <code>null</code>.
*
* @param <T>
* @param uniqueNamePatter an HQL/SQL pattern
* @param organismCommonName the common name of the organism
* @param featureClass the type of feature, e.g. <code>Polypeptide.class</code>
* @return the feature, or <code>null</code> if there isn't such a feature
* @throws RuntimeException if there is more than one feature with the
* specified unique name and type
*/
public <T extends Feature> T getFeatureByUniqueNamePatternAndOrganismCommonName(String uniqueNamePattern, String organismCommonName, Class<T> featureClass) {
@SuppressWarnings("unchecked")
List<T> features = getSession().createQuery(
"from "+featureClass.getName()+" where uniqueName like :uniqueNamePattern" +
" and organism.commonName = :organism")
.setString("uniqueNamePattern", uniqueNamePattern)
.setString("organism", organismCommonName)
.list();
if (features.size() == 0) {
logger.info(String.format("Hibernate found no feature of type '%s' with uniqueName pattern '%s' in organism '%s'",
featureClass.getSimpleName(), uniqueNamePattern, organismCommonName));
return null;
}
if (features.size() > 1) {
throw new RuntimeException(String.format("Found more than one feature of type '%s' with uniqueName '%s' in organism '%s'",
featureClass.getSimpleName(), uniqueNamePattern, organismCommonName));
}
return features.get(0);
}
/**
* Return a list of features whose uniqueName matches the given pattern.
*
* @param namePattern an SQL/HQL pattern
* @return the Feature, or null
*/
@SuppressWarnings("unchecked")
public List<Feature> getFeaturesByUniqueNamePattern(String namePattern) {
List features = getSession().createQuery(
"from Feature where uniqueName like :name")
.setString("name", namePattern).list();
return features;
}
/**
* Return a list of features with any current (ie non-obsolete) synonym
*
* @param name the lookup name
* @return a (possibly empty) List<Feature> of children with this current name
*/
public List<Feature> getFeaturesByAnyCurrentName(String name) {
@SuppressWarnings("unchecked")
List<Feature> features = getSession().createQuery(
"select fs.feature from FeatureSynonym fs where fs.current=true and fs.synonym.name=:name")
.setString("name", name).list();
return features;
}
/**
* Return a list of features located on a source Feature, within a given range
*
* @param min the minimum (interbase) coordinate
* @param max the maximum (interbase) coordinate
* @param strand
* @param parent the source feature
* @param type
* @return a List of the features completely contained within this range
*/
public List<Feature> getFeaturesByRange(int min, int max, int strand, Feature feat, String type) {
int fid = feat.getFeatureId();
@SuppressWarnings("unchecked")
List<Feature> features = getSession().createQuery(
"select f "
+ "from Feature f, FeatureLoc loc where "
+ "f = loc.feature and f.type.name=:type and loc.strand="
+ strand + " and" + " loc.sourceFeature=" + fid + " and ("
+ " loc.fmin<=:min and loc.fmax>=:max)")
.setString("type", type)
.setInteger("min", min)
.setInteger("max", max)
.list();
return features;
}
/**
* Return the FeatureCvTerm that links a given Feature and CvTerm, with a given value of 'not'
*
* @param feature the Feature to test the link for
* @param cvTerm the CvTerm to test the link for
* @param not test for the not flag in the FeatureCvTerm
* @return the Feature, or null
*/
@SuppressWarnings("unchecked")
public List<FeatureCvTerm> getFeatureCvTermsByFeatureAndCvTermAndNot(Feature feature,
CvTerm cvTerm, boolean not) {
List<FeatureCvTerm> list = getSession().createQuery(
"from FeatureCvTerm fct where fct.feature=:feature and fct.cvTerm=:cvTerm and fct.not=:not")
.setParameter("feature", feature)
.setParameter("cvTerm", cvTerm)
.setBoolean("not", not)
.list();
return list;
}
@SuppressWarnings("unchecked")
public List<FeatureCvTerm> getFeatureCvTermsByFeatureAndCvName(Feature feature, CvTerm cvName, boolean not) {
List<FeatureCvTerm> list = getSession().createQuery(
"from FeatureCvTerm fct where fct.feature=:feature and fct.cvTerm.cv.name=:cvName ")
.setParameter("feature", feature)
.setParameter("cvName", cvName)
.setBoolean("not", not)
.list();
return list;
}
public List<Feature> getFeaturesByCvNameAndCvTermNameAndOrganisms(String cvName,
String cvTermName, String orgs) {
logger.info(String.format("Querying with cvName='%s', cvTermName='%s', orgs in (%s)",
cvName, cvTermName, orgs));
@SuppressWarnings("unchecked")
List<Feature> features = getSession().createQuery(
"select feature"
+" from FeatureCvTerm fct"
+" where fct.feature.organism.commonName in ("+orgs+")"
+" and fct.cvTerm.cv.name=:cvName and fct.cvTerm.name=:cvTermName")
.setString("cvName", cvName)
.setString("cvTermName", cvTermName)
.list();
return features;
}
public List<Feature> getFeaturesByCvNamePatternAndCvTermNameAndOrganisms(String cvNamePattern,
String cvTermName, String orgs) {
logger.info(String.format("Querying with cvName like '%s', cvTermName='%s', orgs in (%s)",
cvNamePattern, cvTermName, orgs));
@SuppressWarnings("unchecked")
List<Feature> features = getSession().createQuery(
"select feature"
+" from FeatureCvTerm fct"
+" where fct.feature.organism.commonName in ("+orgs+")"
+" and fct.cvTerm.cv.name like :cvNamePattern and fct.cvTerm.name=:cvTermName")
.setString("cvNamePattern", cvNamePattern)
.setString("cvTermName", cvTermName)
.list();
return features;
}
/**
* Return a synonym of the given name and type if it exists
*
* @param name the name to lookup
* @param type the type of the Synonym
* @return a Synonym, or null
*/
public Synonym getSynonymByNameAndCvTerm(String name, CvTerm type) {
@SuppressWarnings("unchecked")
List<Synonym> list = getSession().createQuery(
"from Synonym s where s.name=:name and s.type=:type")
.setString("name", name)
.setParameter("type", type)
.list();
return firstFromList(list, "name", name, "type", type);
}
/**
* Return a list of FeatureSynonyms which link a given Feature and Synonym
*
* @param feature the test Feature
* @param synonym the test Synonym
* @return a (possibly empty) list of feature synonyms
*/
public List<FeatureSynonym> getFeatureSynonymsByFeatureAndSynonym(Feature feature,
Synonym synonym) {
return performQuery(FeatureSynonym.class,
"from FeatureSynonym fs where fs.feature=:feature and fs.synonym=:synonym",
new String[] { "feature", "synonym" }, new Object[] { feature, synonym });
}
/*
* Deleted doc comment that was obviously wrong. - rh11
* TODO work out what this actually does, and document it.
*/
public List<List<?>> getFeatureByGO(final String go) {
String[] temp = go.split(":");
String number = temp[1];
List<Feature> polypeptides;
List<CvTerm> goName;
List<Feature> features = new ArrayList<Feature>();
polypeptides = performQuery(Feature.class,
"select f " + "from Feature f, CvTerm c, FeatureCvTerm fc where "
+ "c.dbXRef.accession=:number and fc.cvTerm = c "
+ "and fc.feature=f", new String[] { "number" },
new Object[] { number });
for (Feature polypep : polypeptides) {
logger.info(polypep.getUniqueName());
List<Feature> genes = performQuery(Feature.class,
"select f "
+ "from Feature f,FeatureRelationship f1,FeatureRelationship f2 where "
+ "f2.subjectFeature=:polypep and f2.objectFeature=f1.subjectFeature "
+ "and f1.objectFeature=f",
new String[] { "polypep" },
new Object[] { polypep });
if (genes.size() > 0) {
features.add(genes.get(0));
}
}
goName = performQuery(CvTerm.class,
"select cv " + "from CvTerm cv where cv.dbXRef.accession=:number",
new String[] { "number" }, new Object[] { number });
List<Feature> flocs = new ArrayList<Feature>();
String name = "chromosome";
flocs = performQuery(Feature.class,
"select f from Feature f " + "where f.type.name=:name", new String[] { "name" },
new Object[] { name });
List<List<?>> data = new ArrayList<List<?>>();
data.add(features);
data.add(flocs);
data.add(goName);
return data;
}
public FeatureDbXRef getFeatureDbXRefByFeatureAndDbXRef(Feature feature, DbXRef dbXRef) {
List<FeatureDbXRef> results = performQuery(FeatureDbXRef.class,
"from FeatureDbXRef fdxr where fdxr.feature=:feature and fdxr.dbXRef=:dbXRef",
new String[] { "feature", "dbXRef" }, new Object[] { feature, dbXRef });
return firstFromList(results, feature, dbXRef);
}
/**
* Return a list of feature uniquename based on cvterm for auto-completion
*
* @param name the Feature uniquename
* @param orgNames the comma seperated organism common names
* @param featureType the type of Features to return e.g gene
* @param limit the number of maximum results to return
* @return a (possibly empty) List<Feature> of Feature
*/
public List<Feature> getFeaturesByAnyNameAndOrganism(String nl, String orgNames,
String featureType) {
String lookup = nl.replaceAll("\\*", "%");
logger.info("Lookup='" + lookup + "' featureType='" + featureType + "' orgs='" + orgNames
+ "'");
// The list of orgs is being included literally as it didn't seem to
// work as a parameter
List<Feature> features = performQuery(Feature.class,
"select f from Feature f where"
+ " f.uniqueName like :lookup and f.type.name=:featureType and f.organism.commonName in ( "
+ orgNames + " )", new String[] { "lookup", "featureType" },
new Object[] { lookup, featureType, });
return features;
}
/**
* Get a list of all products, together with the number of
* times each is used.
*
* @return a list of <code>CountedName</code> objects
*/
// FIXME - Remove hard coded value - make more general?
public List<CountedName> getAllProductsWithCount() {
return performQuery(CountedName.class,
"select new CountedName(cvt.name, count(fct.feature.uniqueName))"
+ " from CvTerm cvt, FeatureCvTerm fct"
+ " where cvt=fct.cvTerm and cvt.cv=15 group by cvt.name");
}
/**
* Retrieve a count of how many times a given featureCvTerm appears in a
* given organism
*
* @return the count
*/
public Long getFeatureCvTermCountInOrganism(String name, Organism o) {
Query query = createQuery(
"select count(f) from FeatureCvTerm fct, Feature f" +
//" where f.organism=:organism and fct.feature = f and fct.cvTerm.name = :name",
" where fct.feature = f and f.organism=:organism and fct.cvTerm.name = :name ",
new String[]{"name", "organism"},
new Object[]{name, o});
return (Long) query.uniqueResult();
}
/**
* Return a list of features that have this particular cvterm
*
* @param cvTermName the CvTerm name
* @return a (possibly empty) List<Feature> of children
*/
public List<Feature> getFeaturesByCvTermName(String cvTermName) {
List<Feature> features = performQuery(Feature.class,
"select fct.feature from FeatureCvTerm fct where fct.type.name like :cvTermName",
"cvTermName", cvTermName);
return features;
}
// FIXME - Use top level properties instead
/**
* Return a list of top-level features
*
* @return a (possibly empty) List<Feature> of children
*/
public List<Feature> getTopLevelFeatures() {
String name = "%chromosome%";
List<Feature> topLevels = performQuery(Feature.class,
"from Feature where cvTerm.name like :name", "name", name);
return topLevels;
}
public List<Feature> getTopLevelFeaturesInOrganism(Organism organism) {
CvTerm cvterm = cvDao.getCvTermByNameAndCvName("top_level_seq", "genedb_misc", true);
List<Feature> topLevels = performQuery(Feature.class,
"select f from Feature f, FeatureProp fp " +
" where fp.feature = f " +
" and f.organism = :organism " +
" and fp.cvTerm = :cvterm " +
" order by f.uniqueName ",
new String[] {"organism", "cvterm"}, new Object[] {organism, cvterm});
return topLevels;
}
/**
* Return a list of features that have this particular cvterm
*
* @param cvTermName the CvTerm name
* @param cvName the CV to which the term belongs
* @return a (possibly empty) List<Feature> of matching features
*/
public List<Feature> getFeaturesByCvTermNameAndCvName(String cvTermName, String cvName) {
List<Feature> features = performQuery(Feature.class,
"select f.feature from FeatureCvTerm f where f.cvTerm.name like :cvTermName"
+" and f.cvTerm.cv.name like :cvName",
new String[] { "cvTermName", "cvName" },
new Object[] { cvTermName, cvName });
return features;
}
/**
* Given the specification of a CvTerm, find all the genes that
* belong to an organism and have transcripts that have polypeptides
* that have this CvTerm associated to them. In practice, this is used
* to get a list of genes that have a particular Gene Ontology annotation,
* for example.
*
* @param cvTermName the CvTerm name
* @param cvName the Cv name
* @param organism the Organism common name. can be null in which case search spans
* across all organisms
* @return a (possibly empty) List<GeneNameOrganism> of matches
*/
public List<GeneNameOrganism> getGeneNameOrganismsByCvTermNameAndCvName(String cvTermName, String cvName,
String organism) {
List<GeneNameOrganism> geneNameOrganisms;
if(organism != null) {
geneNameOrganisms = performQuery(GeneNameOrganism.class,
"select new org.gmod.schema.utils.GeneNameOrganism( " +
"transcript_gene.objectFeature.uniqueName, transcript_gene.objectFeature.organism.abbreviation) " +
"from " +
"FeatureRelationship transcript_gene, FeatureRelationship polypeptide_transcript " +
"where transcript_gene.subjectFeature=polypeptide_transcript.objectFeature and " +
"polypeptide_transcript.type.name='derives_from' and " +
" transcript_gene.objectFeature.organism.commonName in ("+organism+") and " +
"polypeptide_transcript.subjectFeature in ( " +
"select fct.feature from FeatureCvTerm fct where " +
"fct.cvTerm.name=:cvTermName and fct.cvTerm.cv.name=:cvName) " +
"order by transcript_gene.objectFeature.organism.abbreviation",
new String[] { "cvTermName", "cvName" },
new Object[] { cvTermName, cvName });
} else {
geneNameOrganisms = performQuery(GeneNameOrganism.class,
"select new org.gmod.schema.utils.GeneNameOrganism( " +
"transcript_gene.objectFeature.uniqueName, transcript_gene.objectFeature.organism.abbreviation) " +
"from " +
"FeatureRelationship transcript_gene, FeatureRelationship polypeptide_transcript " +
"where transcript_gene.subjectFeature=polypeptide_transcript.objectFeature and " +
"polypeptide_transcript.type.name='derives_from' and " +
"polypeptide_transcript.subjectFeature in ( " +
"select fct.feature from FeatureCvTerm fct where " +
"fct.cvTerm.name=:cvTermName and fct.cvTerm.cv.name=:cvName) " +
"order by transcript_gene.objectFeature.organism.abbreviation",
new String[] { "cvTermName", "cvName" },
new Object[] { cvTermName, cvName });
}
return geneNameOrganisms;
}
public List<GeneNameOrganism> getGeneNameOrganismsByCvTermNameAndCvNamePattern(String cvTermName, String cvNamePattern,
String organism) {
List<GeneNameOrganism> geneNameOrganisms;
if(organism != null) {
geneNameOrganisms = performQuery(GeneNameOrganism.class,
"select new org.gmod.schema.utils.GeneNameOrganism( " +
"transcript_gene.objectFeature.uniqueName, transcript_gene.objectFeature.organism.abbreviation) " +
"from " +
"FeatureRelationship transcript_gene, FeatureRelationship polypeptide_transcript " +
"where transcript_gene.subjectFeature=polypeptide_transcript.objectFeature and " +
"polypeptide_transcript.type.name='derives_from' and " +
" transcript_gene.objectFeature.organism.commonName in ("+organism+") and " +
"polypeptide_transcript.subjectFeature in ( " +
"select fct.feature from FeatureCvTerm fct where " +
"fct.cvTerm.name=:cvTermName and fct.cvTerm.cv.name like :cvNamePattern) " +
"order by transcript_gene.objectFeature.organism.abbreviation",
new String[] { "cvTermName", "cvNamePattern" },
new Object[] { cvTermName, cvNamePattern });
} else {
geneNameOrganisms = performQuery(GeneNameOrganism.class,
"select new org.gmod.schema.utils.GeneNameOrganism( " +
"transcript_gene.objectFeature.uniqueName, transcript_gene.objectFeature.organism.abbreviation) " +
"from " +
"FeatureRelationship transcript_gene, FeatureRelationship polypeptide_transcript " +
"where transcript_gene.subjectFeature=polypeptide_transcript.objectFeature and " +
"polypeptide_transcript.type.name='derives_from' and " +
"polypeptide_transcript.subjectFeature in ( " +
"select fct.feature from FeatureCvTerm fct where " +
"fct.cvTerm.name=:cvTermName and fct.cvTerm.cv.name like :cvNamePattern) " +
"order by transcript_gene.objectFeature.organism.abbreviation",
new String[] { "cvTermName", "cvNamePattern" },
new Object[] { cvTermName, cvNamePattern });
}
return geneNameOrganisms;
}
/**
* Return a list of feature uniquename based on cvterm for auto-completion
*
* @param name the Feature uniquename
* @param cvTerm the CvTerm
* @param limit the maximum number of results to return
* @return a (possibly empty) List<String> of feature uniquename
*/
public List<String> getPossibleMatches(String name, CvTerm cvTerm, int limit) {
@SuppressWarnings("unchecked")
List<String> result = createQuery(
"select f.uniqueName from Feature f where lower(f.uniqueName) like lower(:name) and f.type = :cvTerm",
new String[] { "name", "cvTerm" },
new Object[] { "%" + name + "%", cvTerm })
.setMaxResults(limit)
.list();
return result;
}
/**
* Return a list of feature based on organism
*
* @param organism the Organism
* @return a (possibly empty) List<String> of feature
*/
public List<Feature> getFeaturesByOrganism(Organism org) {
List<Feature> features = performQuery(Feature.class,
"from Feature f where f.organism=:org", "org", org);
return features;
}
/**
* Return the features corresponding to uniquenames in the list
*
* @param names the list of uniquenames
* @return the list of Features, or null
*/
public List<Feature> getFeaturesByUniqueNames(List<String> names) {
boolean notFirst = false;
StringBuilder featureIds = new StringBuilder();
for (String name : names) {
if (notFirst) {
featureIds.append(", ");
} else {
notFirst = true;
}
featureIds.append('\'');
featureIds.append(name);
featureIds.append('\'');
}
String query = "from Feature f where f.uniqueName in (" + featureIds.toString() + ")";
List<Feature> features = performQuery(Feature.class, query);
return features;
}
/**
* Return a list of features located within a given range
*
* @param min the minimum (interbase) coordinate
* @param max the maximum (interbase) coordinate
* @param type (gene, protein, mRNA etc)
* @param organism
* @param parent (chromosome or contig)
* @return a ;ist of features completely contained within this range
*/
public List<Feature> getFeaturesByLocation(int min, int max, String type, String organism,
Feature parent) {
List<Feature> features = performQuery(Feature.class,
"select f from Feature f , FeatureLoc fl " + "where fl.fmin>=:min "
+ "and fl.fmax<=:max and fl.feature=f.featureId "
+ "and fl.featureBySrcFeatureId=:parent and f.type.name=:type "
+ "and f.organism.commonName=:organism",
new String[] { "min", "max", "type", "organism", "parent" },
new Object[] { min, max, type, organism, parent });
return features;
}
/**
* Return the FeatureRelationship containing a particular subject, object and the relation
*
* @param subject the subject Feature
* @param object the object Feature
* @param relation the cvterm corresponding to the relation
* @return the FeatureRelationship, or null
*/
public FeatureRelationship getFeatureRelationshipBySubjectObjectAndRelation(Feature subject,
Feature object, CvTerm relation) {
List<FeatureRelationship> frs = performQuery(FeatureRelationship.class,
"from FeatureRelationship fr "
+ "where fr.subjectFeature=:subject and fr.objectFeature=:object "
+ "and fr.type=:relation",
new String[] { "subject", "object", "relation" },
new Object[] { subject, object, relation });
if (!frs.isEmpty()) {
return frs.get(0);
}
return null;
}
private boolean featureExists(String uniqueName) {
List<?> names = performQuery(String.class,
"select uniqueName from Feature where uniqueName = :uniqueName",
"uniqueName", uniqueName);
return !names.isEmpty();
}
/**
* Given a candidate uniqueName for a feature, return a derived
* name that does not exist in the database. If the given name
* does not exist, it is guaranteed to be returned unchanged;
* otherwise it will have the string <code>:n</code> appended,
* where <code>n</code> is the least positive integer such that
* the name does not exist.
*
* @param uniqueName the proposed uniqueName
* @return a derived uniqueName that does not exist in the database
*/
public String makeNameUnique(String uniqueName) {
/*
* Any features which have been persisted but not flushed
* will fail to be found here, unless we flush them first.
*/
flush();
if (!featureExists(uniqueName)) {
logger.debug(String.format("Feature named '%s' does not already exist", uniqueName));
return uniqueName;
}
String nameToUse;
for (int n=1; featureExists(nameToUse = String.format("%s:%d", uniqueName, n)); n++)
logger.debug(String.format("Feature '%s' will be named '%s'", uniqueName, nameToUse));
return nameToUse;
}
/*
* The object-creation methods below are experimental. The thought is that
* it's redundant to have to specify the CvTerm when creating a feature,
* because the feature class determines it. I'm not sure whether it's
* possible to have the constructors do this: certainly they can't get
* access to the Hibernate session in any straightforward fashion. One
* possibility is to get the session factory from JNDI, where Hibernate
* should bind it. (In fact I don't think that is currently working: we get
* an error saying it can't be bound. This is presumably easy to fix.)
*
* If we can write constructors that get the CvTerm themselves, that would
* perhaps be a better solution to the basic problem. It's obviously
* inconvenient to have a lot of class-specific methods here that should
* really be associated with the classes themselves. On the other hand,
* methods here can do more sophisticated construction involving several
* objects, factory-style, as {@link #createPolypeptideDomain} shows.
*
* The message (to my future self and to anyone else who works on this) is
* to keep an open mind about whether this is a good idea or not. Perhaps we
* should ALSO have constructors that work out the CvTerm for themselves,
* and just have non-trivial factory methods defined here.
*
* There is also some overlap of intent with
* org.genedb.db.loading.FeatureUtils, which should be resolved, perhaps by
* migrating the factory methods of FeatureUtils into here..
*
* -rh11
*/
private CvTerm polypeptideDomainType;
private CvTerm descriptionType;
/**
* Create a new polypeptide domain feature
*
* @param domainUniqueName
* @param polypeptide the polypeptide to which this domain feature should be attached
* @param score an indication, from the algorithm that predicted this domain,
* of the confidence of the prediction. Usually a number.
* @param description description of the domain
* @param start the start of the domain, relative to the polypeptide, in interbase coordinates
* @param end the end of the domain, relative to the polypeptide, in interbase coordinates
* @param dbxref a database reference for this domain, if applicable. Can be null.
* @return the newly-created polypeptide domain
*/
public PolypeptideDomain createPolypeptideDomain(String domainUniqueName, Polypeptide polypeptide,
String score, String description, int start, int end, DbXRef dbxref) {
return createPolypeptideDomain(domainUniqueName, polypeptide, score, description, start, end,
dbxref, null, null);
}
/**
* Create a new polypeptide domain feature
*
* @param domainUniqueName
* @param polypeptide the polypeptide to which this domain feature should be attached
* @param score an indication, from the algorithm that predicted this domain,
* of the confidence of the prediction. Usually a number.
* @param description description of the domain
* @param start the start of the domain, relative to the polypeptide, in interbase coordinates
* @param end the end of the domain, relative to the polypeptide, in interbase coordinates
* @param dbxref a database reference for this domain, if applicable. Can be null.
* @param evalue the E-value assigned to this domain by the prediction algorithm. Can be null.
* @param analysis the analysis object to which to which the polypeptide domain should
by attached via an analysisfeature
* @return the newly-created polypeptide domain
*/
public PolypeptideDomain createPolypeptideDomain(String domainUniqueName, Polypeptide polypeptide,
String score, String description, int start, int end, DbXRef dbxref, String evalue,
Analysis analysis) {
if (polypeptideDomainType == null) {
polypeptideDomainType = cvDao.getCvTermByNameAndCvName("polypeptide_domain", "sequence");
}
if (descriptionType == null) {
descriptionType = cvDao.getCvTermByNameAndCvName("description", "feature_property");
}
PolypeptideDomain domain = new PolypeptideDomain(
polypeptide.getOrganism(), polypeptideDomainType, domainUniqueName);
FeatureLoc domainLoc = new FeatureLoc(polypeptide, domain, start, false, end, false, (short)0/*strand*/, null, 0, 0);
domain.addFeatureLoc(domainLoc);
FeatureProp descriptionProp = new FeatureProp(domain, descriptionType, description, 0);
domain.addFeatureProp(descriptionProp);
domain.setDbXRef(dbxref);
// Add analysisfeature
if (analysis != null) {
domain.createAnalysisFeature(analysis, score, evalue);
}
persist(domain);
return domain;
// TODO Add interproDbxref as additional parameter?
}
public MembraneStructure createMembraneStructure(Polypeptide polypeptide) {
return createPolypeptideRegion(MembraneStructure.class, polypeptide, 0, polypeptide.getSeqLen());
}
public TransmembraneRegion createTransmembraneRegion(MembraneStructure membraneStructure, int start, int end) {
return createMembraneStructureComponent(TransmembraneRegion.class, start, end, membraneStructure);
}
public CytoplasmicRegion createCytoplasmicRegion(MembraneStructure membraneStructure, int start, int end) {
return createMembraneStructureComponent(CytoplasmicRegion.class, start, end, membraneStructure);
}
public NonCytoplasmicRegion createNonCytoplasmicRegion(MembraneStructure membraneStructure, int start, int end) {
return createMembraneStructureComponent(NonCytoplasmicRegion.class, start, end, membraneStructure);
}
private <T extends MembraneStructureComponent> T createMembraneStructureComponent(Class<T> componentClass,
int start, int end, MembraneStructure membraneStructure) {
return createPolypeptideRegion(componentClass, membraneStructure.getPolypeptide(), start, end, membraneStructure);
}
private <T extends PolypeptideRegion> T createPolypeptideRegion(Class<T> regionClass,
Polypeptide polypeptide, int start, int end, PolypeptideRegion containingRegion) {
T region = createPolypeptideRegion(regionClass, polypeptide, start, end);
addPart(containingRegion, region);
return region;
}
private CvTerm partOfType;
private FeatureRelationship addPart(Feature whole, Feature part) {
if (partOfType == null) {
partOfType = cvDao.getCvTermByNameAndCvName("part_of", "relationship");
}
FeatureRelationship featureRelationship = new FeatureRelationship(part, whole, partOfType, 0);
part.addFeatureRelationshipsForSubjectId(featureRelationship);
whole.addFeatureRelationshipsForObjectId(featureRelationship);
return featureRelationship;
}
private <T extends PolypeptideRegion> T createPolypeptideRegion(Class<T> regionClass,
Polypeptide polypeptide, int start, int end) {
CvTerm regionTerm = cvDao.getCvTermForAnnotatedClass(regionClass);
String regionUniqueName = String.format("%s:%d-%d", polypeptide.getUniqueName(), start, end);
T region;
try {
region = regionClass.getConstructor(Organism.class, CvTerm.class, String.class)
.newInstance(polypeptide.getOrganism(), regionTerm, regionUniqueName);
}
catch (Exception e) {
throw new RuntimeException(String.format("Failed to instantiate %s", regionClass), e);
}
FeatureLoc regionLoc = new FeatureLoc(polypeptide, region, start, false, end, false, (short)0/*strand*/, null, 0, 0);
region.addFeatureLoc(regionLoc);
return region;
}
private CvTerm signalPeptideType;
private CvTerm cleavageSiteProbabilityType;
public SignalPeptide createSignalPeptide(Polypeptide polypeptide, int loc, String probability) {
return createSignalPeptide(polypeptide, loc, probability, null);
}
public SignalPeptide createSignalPeptide(Polypeptide polypeptide, int loc, String probability, Analysis analysis) {
if (signalPeptideType == null) {
signalPeptideType = cvDao.getCvTermByDbAcc("sequence", "0000418");
}
if (cleavageSiteProbabilityType == null) {
cleavageSiteProbabilityType = cvDao.getCvTermByNameAndCvName("cleavage_site_probability", "genedb_misc");
}
String regionUniqueName = String.format("%s:sigp%d", polypeptide.getUniqueName(), loc);
SignalPeptide signalPeptide = new SignalPeptide(polypeptide.getOrganism(), signalPeptideType, regionUniqueName);
FeatureLoc signalPeptideLoc = new FeatureLoc(polypeptide, signalPeptide, 0, false, loc, false, (short)0/*strand*/, null, 0, 0);
signalPeptide.addFeatureLoc(signalPeptideLoc);
FeatureProp probabilityProp = new FeatureProp(signalPeptide, cleavageSiteProbabilityType, probability, 0);
signalPeptide.addFeatureProp(probabilityProp);
// Add analysisfeature
if (analysis != null) {
signalPeptide.createAnalysisFeature(analysis);
} else {
throw new RuntimeException("Could not create analysisfeature because analysis object is null");
}
return signalPeptide;
}
//Helix-turn-helix 22.6.2009 NDS
private CvTerm helixTurnHelixType;
private CvTerm maxScoreAtCvTerm;
private CvTerm stdDeviationsCvTerm;
public HelixTurnHelix createHelixTurnHelix(Polypeptide polypeptide, int start, int end, String score, int maxScoreAt, String stdDeviations, Analysis analysis) {
if (helixTurnHelixType == null) {
/* Looks for the cvterm where the dxref_id corresponds to a dbxref record
* whose accession is 0001081 and the database is 'SO' */
helixTurnHelixType = cvDao.getCvTermByDbAcc("SO", "0001081");
helixTurnHelixType.getCvTermId();
}
String uniqueName = String.format("%s:%d-%d", polypeptide.getUniqueName(), start, end);
HelixTurnHelix helixTurnHelix = new HelixTurnHelix(polypeptide.getOrganism(), helixTurnHelixType, uniqueName, true /*analysis*/, false /*obsolete*/);
/* Add featureloc */
FeatureLoc hthLoc = new FeatureLoc(polypeptide /*sourcefeature*/, helixTurnHelix, start /*fmin*/, end /*fmax*/, 0 /*strand*/, null /*phase*/, 0 /*rank*/);
helixTurnHelix.addFeatureLoc(hthLoc);
/* Add feature properties */
helixTurnHelix.addFeatureProp(new Integer(maxScoreAt).toString(), "genedb_misc", "Maximum_score_at", 0 /*rank*/);
helixTurnHelix.addFeatureProp(stdDeviations, "genedb_misc", "Standard_deviations", 0 /*rank*/);
/* Add analysisfeature */
if (analysis != null) {
helixTurnHelix.createAnalysisFeature(analysis,score,null);
} else {
throw new RuntimeException("Could not create analysisfeature because analysis object is null");
}
return helixTurnHelix;
}
private CvTerm gpiAnchoredType;
private CvTerm gpiAnchorCleavageSiteType;
private CvTerm gpiCleavageSiteScoreType;
public FeatureProp createGPIAnchoredProperty(Polypeptide polypeptide) {
if (gpiAnchoredType == null) {
gpiAnchoredType = cvDao.getCvTermByNameAndCvName("GPI_anchored", "genedb_misc");
}
FeatureProp featureProp = new FeatureProp(polypeptide, gpiAnchoredType, "true", 0);
polypeptide.addFeatureProp(featureProp);
return featureProp;
}
public GPIAnchorCleavageSite createGPIAnchorCleavageSite(Polypeptide polypeptide, int anchorLocation, String score) {
if (gpiAnchorCleavageSiteType == null) {
gpiAnchorCleavageSiteType = cvDao.getCvTermByNameAndCvName("GPI_anchor_cleavage_site", "genedb_feature_type");
}
if (gpiCleavageSiteScoreType == null) {
gpiCleavageSiteScoreType = cvDao.getCvTermByNameAndCvName("GPI_cleavage_site_score", "genedb_misc");
}
String cleavageSiteUniqueName = String.format("%s:gpi", polypeptide.getUniqueName());
GPIAnchorCleavageSite cleavageSite = new GPIAnchorCleavageSite(polypeptide.getOrganism(), gpiAnchorCleavageSiteType, cleavageSiteUniqueName);
FeatureLoc cleavageSiteLoc = new FeatureLoc(polypeptide, cleavageSite, anchorLocation, false, anchorLocation, false, (short)0/*strand*/, null, 0, 0);
cleavageSite.addFeatureLoc(cleavageSiteLoc);
FeatureProp scoreProp = new FeatureProp(cleavageSite, gpiCleavageSiteScoreType, score, 0);
cleavageSite.addFeatureProp(scoreProp);
return cleavageSite;
}
private CvTerm plasmoAPScoreType;
public FeatureProp createPlasmoAPScore(Polypeptide polypeptide, String score) {
if (plasmoAPScoreType == null) {
plasmoAPScoreType = cvDao.getCvTermByNameAndCvName("PlasmoAP_score", "genedb_misc");
}
FeatureProp featureProp = new FeatureProp(polypeptide, plasmoAPScoreType, score, 0);
polypeptide.addFeatureProp(featureProp);
return featureProp;
}
/**
* Delete all the features that are located on the specified source feature.
* Also deletes features that are located on features located on the specified
* source feature, e.g. ProteinMatch features located on a Polypeptide.
*
* @param sourceFeature
*/
public void deleteFeaturesLocatedOn(Feature sourceFeature) {
logger.trace(String.format("Deleting features located on '%s' (ID=%d)",
sourceFeature.getUniqueName(), sourceFeature.getFeatureId()));
// Similarity protein matches involve two features, a ProteinMatch
// and a Region, that are not directly located on the top-level
// feature. We need to deal with these separately.
/*
* An apparent bug in the HQL processing of Hibernate 3.3.1 GA
* causes the corresponding HQL delete statements to be translated
* to invalid SQL. For reference, the HQL might read as follows:
*
* delete Feature f where f in (
* select region
* from FeatureLoc matchOnPolypeptide
* left join matchOnPolypeptide.sourceFeature as polypeptide
* with polypeptide.class = Polypeptide
* left join matchOnPolypeptide.feature as match
* , FeatureLoc matchOnRegion
* left join matchOnRegion.sourceFeature as region
* , FeatureLoc polypeptideOnToplevel
* where matchOnPolypeptide.locGroup = 0
* and matchOnPolypeptide.rank = 0
* and matchOnRegion.feature = matchOnPolypeptide.feature
* and matchOnRegion.locGroup = 0
* and matchOnRegion.rank = 1
* and polypeptideOnToplevel.feature = polypeptide
* and polypeptideOnToplevel.sourceFeature.uniqueName = 'super1'
* );
*
* delete Feature f where f in (
* select match
* from FeatureLoc matchOnPolypeptide
* left join matchOnPolypeptide.sourceFeature as polypeptide
* with polypeptide.class = Polypeptide
* left join matchOnPolypeptide.feature as match
* , FeatureLoc polypeptideOnToplevel
* where matchOnPolypeptide.locGroup = 0
* and matchOnPolypeptide.rank = 0
* and polypeptideOnToplevel.feature = polypeptide
* and polypeptideOnToplevel.sourceFeature.uniqueName = 'super1'
* );
*
* I have reported this as HHH-3651
* (http://opensource.atlassian.com/projects/hibernate/browse/HHH-3651)
*/
int numberOfRowsDeleted = getSession().createSQLQuery(
" delete from feature where feature_id in (" +
" select feature.feature_id" +
" from feature" +
" , featureloc match_on_polypeptide" +
" join feature polypeptide" +
" on match_on_polypeptide.srcfeature_id = polypeptide.feature_id" +
" join featureloc match_on_region" +
" on match_on_region.feature_id = match_on_polypeptide.feature_id" +
" join featureloc polypeptide_on_toplevel" +
" on match_on_polypeptide.srcfeature_id = polypeptide_on_toplevel.feature_id" +
" where polypeptide.type_id in (" +
" select cvterm.cvterm_id" +
" from cvterm join cv on cv.cv_id = cvterm.cv_id" +
" where cv.name = 'sequence'" +
" and cvterm.name = 'polypeptide'" +
" )" +
" and match_on_polypeptide.locgroup = 0" +
" and match_on_polypeptide.rank = 0" +
" and match_on_region.locgroup = 0" +
" and match_on_region.rank = 1" +
" and polypeptide_on_toplevel.srcfeature_id = ?" +
" and feature.feature_id in (" +
" match_on_region.srcfeature_id" +
" , match_on_region.feature_id" +
" )" +
" );")
.setInteger(0, sourceFeature.getFeatureId())
.executeUpdate();
logger.debug(String.format("Deleted %d similarity features from '%s'",
numberOfRowsDeleted, sourceFeature.getUniqueName()));
int numberOfFirstLevelFeaturesDeleted = getSession().createQuery(
"delete Feature f where f in (" +
" select fl.feature from FeatureLoc fl where fl.sourceFeature = :sourceFeature" +
")"
).setParameter("sourceFeature", sourceFeature)
.executeUpdate();
logger.debug(String.format("Deleted %d first-level features from '%s'",
numberOfFirstLevelFeaturesDeleted, sourceFeature.getUniqueName()));
}
/**
* Delete all the featureLocs that point to this sourceFeature
*
* @param sourceFeature
*/
public void deleteFeatureLocsOn(Feature sourceFeature){
logger.info(String.format("Deleting all the feature locs pointing at '%s' (ID=%d)",
sourceFeature.getUniqueName(), sourceFeature.getFeatureId()));
int numberOfRowsDeleted = getSession().createSQLQuery(
" delete from featureloc where srcfeature_id= ? ;")
.setInteger(0, sourceFeature.getFeatureId())
.executeUpdate();
logger.info(String.format("Deleted %d featurelocs pointing to '%s'",
numberOfRowsDeleted, sourceFeature.getUniqueName()));
}
/* Invoked by Spring */
public void setCvDao(CvDao cvDao) {
this.cvDao = cvDao;
}
public List<Feature> getFeaturesByPreviousSystematicId(String id) {
List<Feature> features = performQuery(Feature.class,
"select fs.feature from FeatureSynonym fs" +
" where fs.synonym.type.name='previous_systematic_id'" +
" and fs.synonym.name=:id",
new String[] { "id" },
new Object[] { id });
return features;
}
}