package org.genedb.db.dao;
import org.genedb.util.SynchronizedTwoKeyMap;
import org.genedb.util.TwoKeyMap;
import org.gmod.schema.cfg.FeatureType;
import org.gmod.schema.cfg.FeatureTypeUtils;
import org.gmod.schema.feature.Polypeptide;
import org.gmod.schema.mapped.Cv;
import org.gmod.schema.mapped.CvTerm;
import org.gmod.schema.mapped.Db;
import org.gmod.schema.mapped.DbXRef;
import org.gmod.schema.mapped.Feature;
import org.gmod.schema.utils.CountedName;
import org.apache.log4j.Logger;
import org.hibernate.Session;
import org.springframework.transaction.annotation.Transactional;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Transactional(readOnly = true)
public class CvDao extends BaseDao {
private static Logger logger = Logger.getLogger(CvDao.class);
private static final int CVTERM_MAX_LENGTH = 1024;
private static final int DBXREF_ACCESSION_MAX_LENGTH = 255;
private GeneralDao generalDao;
public Cv getCvById(int id) {
return (Cv) getSession().load(Cv.class, id);
}
public List<Cv> getCvsByNamePattern(String namePattern) {
@SuppressWarnings("unchecked")
List<Cv> cvs = getSession().createQuery(
"from Cv cv where cv.name like :name")
.setString("name", namePattern).list();
return cvs;
}
private Map<String,Cv> cvByName = new HashMap<String,Cv>();
public synchronized Cv getCvByName(String name) {
if (cvByName.containsKey(name)) {
return cvByName.get(name);
}
@SuppressWarnings("unchecked")
List<Cv> cvs = getSession().createQuery(
"from Cv cv where cv.name like :name")
.setString("name", name).list();
if (cvs.isEmpty()) {
logger.warn(String.format("Failed to find CV with name '%s'", name));
return null;
}
Cv cv = cvs.get(0);
cvByName.put(name, cv);
return cv;
}
public CvTerm getCvTermById(int id) {
logger.trace(String.format("Fetching CvTerm with id %d", id));
return (CvTerm) getSession().load(CvTerm.class, id);
}
public List<CvTerm> getCvTermByNamePatternInCv(String cvTermNamePattern, Cv cv) {
@SuppressWarnings("unchecked")
List<CvTerm> cvTermList = getSession().createQuery(
"from CvTerm cvTerm where cvTerm.name like :cvTermNamePattern and cvTerm.cv = :cv")
.setString("cvTermNamePattern", cvTermNamePattern).setParameter("cv", cv).list();
if (cvTermList == null || cvTermList.size() == 0) {
logger.warn("No cvterms found matching '" + cvTermNamePattern + "' in '" + cv.getName() + "'");
return null;
}
return cvTermList;
}
private Db DB_GO;
public CvTerm getGoCvTermByAcc(String accession) {
if (DB_GO == null) {
DB_GO = generalDao.getDbByName("GO");
}
@SuppressWarnings("unchecked")
List<CvTerm> terms = getSession().createQuery(
"from CvTerm cvTerm where cvTerm.dbxref.db.name='GO' and cvTerm.dbxref.accession=:acc")
.setString("acc", accession).list();
return firstFromList(terms, "accession", accession);
}
public Map<String, Integer> getGoTermIdsByAcc() {
if (DB_GO == null) {
DB_GO = generalDao.getDbByName("GO");
}
Map<String, Integer> goTerms = new HashMap<String, Integer>();
@SuppressWarnings("unchecked")
Collection<Object[]> results = getSession().createQuery(
"select cvTerm.dbXRef.accession, cvTerm.id "
+ "from CvTerm cvTerm "
+ "where cvTerm.dbXRef.db = :goDb")
.setParameter("goDb", DB_GO).list();
for (Object[] result : results) {
goTerms.put((String) result[0], (Integer) result[1]);
}
return goTerms;
}
public void setGeneralDao(GeneralDao generalDao) {
this.generalDao = generalDao;
}
public boolean existsNameInOntology(String name, Cv ontology) {
List<CvTerm> tmp = this.getCvTermByNamePatternInCv(name, ontology);
if (tmp == null || tmp.size() == 0) {
return false;
}
return true;
}
public List<CvTerm> getCvTerms() {
@SuppressWarnings("unchecked")
List<CvTerm> cvTerms = getSession().createCriteria(CvTerm.class).list();
return cvTerms;
}
private TwoKeyMap<String,String,Integer> cvTermIdsByCvAndLcName = new SynchronizedTwoKeyMap<String,String,Integer>();
/**
* Get the CvTerm with the specified CV and name. If there is no such term,
* log a warning and return <code>null</code>. This is the same as
* <code>getCvTermByNameAndCvName(cvTermName, cvName, true)</code>.
*
* @param cvTermName the term, treated case-insensitively
* @param cvName the name of the CV
* @return the CvTerm with the specified CV and name, or <code>null</code> if there is no such term.
*/
public CvTerm getCvTermByNameAndCvName(String cvTermName, String cvName) {
return getCvTermByNameAndCvName(cvTermName, cvName, true);
}
/**
* Get the CvTerm with the specified CV and name. If there is no such term,
* return <code>null</code>. If the parameter <code>complainIfNotFound</code>
* is true, then also log a warning in this case.
*
* @param cvTermName the term, treated case-insensitively
* @param cvName the name of the CV
* @param complainIfNotFound whether to log a warning if the term is not found. Only
* pass <code>false</code> here if you're genuinely agnostic about whether the
* term exists
* @return the CvTerm with the specified CV and name, or <code>null</code> if there is no such term.
*/
public CvTerm getCvTermByNameAndCvName(String cvTermName, String cvName, boolean complainIfNotFound) {
Session session = getSession();
String lcTermName = cvTermName.toLowerCase();
synchronized (cvTermIdsByCvAndLcName) {
if (cvTermIdsByCvAndLcName.containsKey(cvName, lcTermName)) {
CvTerm cvTerm = (CvTerm) session.get(CvTerm.class, cvTermIdsByCvAndLcName.get(cvName, lcTermName));
if (cvTerm != null) {
/*
* It is possible for the ID to be in the cache but the CvTerm not to exist
* in the database, even if only a single thread is accessing the database
* at a time: the CvTerm might have been added in a session that was later
* rolled back.
*/
return cvTerm;
}
}
}
@SuppressWarnings("unchecked")
List<CvTerm> cvTermList = session.createQuery(
"from CvTerm cvTerm where lower(cvTerm.name) = :lcTermName and cvTerm.cv.name = :cvName")
.setString("lcTermName", lcTermName).setString("cvName", cvName).list();
if (cvTermList == null || cvTermList.size() == 0) {
if (complainIfNotFound) {
logger.warn("No cvterms found for '" + cvTermName + "' in '" + cvName + "'");
}
return null;
}
if (cvTermList.size() > 1) {
logger.error(String.format("Found %d CvTerms with cv '%s' and term name '%s'",
cvTermList.size(), cvName, cvTermName));
}
CvTerm cvTerm = cvTermList.get(0);
cvTermIdsByCvAndLcName.put(cvName, lcTermName, cvTerm.getCvTermId());
return cvTerm;
}
private TwoKeyMap<String,String,Integer> cvTermIdsByAccessionAndCv = new SynchronizedTwoKeyMap<String,String,Integer>();
public CvTerm getCvTermByAccessionAndCvName(String accession, String cvName) {
Session session = getSession();
if (cvTermIdsByAccessionAndCv.containsKey(cvName, accession)) {
CvTerm cvTerm = (CvTerm) session.get(CvTerm.class, cvTermIdsByAccessionAndCv.get(cvName, accession));
if (cvTerm != null) {
/*
* It is possible for the ID to be in the cache but the CvTerm not to exist
* in the database, even if only a single thread is accessing the database
* at a time: the CvTerm might have been added in a session that was later
* rolled back.
*/
return cvTerm;
}
}
@SuppressWarnings("unchecked")
List<CvTerm> cvTermList = session.createQuery(
"from CvTerm cvTerm where cvTerm.dbXRef.accession = :accession and cvTerm.cv.name = :cvName")
.setString("accession", accession).setString("cvName", cvName).list();
if (cvTermList == null || cvTermList.size() == 0) {
logger.warn("No cvterms found for accession '" + accession + "' in '" + cvName + "'");
return null;
}
if (cvTermList.size() > 1) {
logger.error(String.format("Found %d CvTerms with cv '%s' and accession ID '%s'",
cvTermList.size(), cvName, accession));
}
CvTerm cvTerm = cvTermList.get(0);
cvTermIdsByAccessionAndCv.put(cvName, accession, cvTerm.getCvTermId());
return cvTerm;
}
/**
* Get the CvTerm with the specified CV and name, assuming that it exists.
*
* @param termName the term
* @param cvName the name of the CV, treated case-insensitively
* @return the CvTerm with the specified CV and name
* @throws RuntimeException if there is no such term
*/
public CvTerm getExistingCvTermByNameAndCvName(String termName, String cvName) {
CvTerm cvTerm = getCvTermByNameAndCvName(termName, cvName, false);
if (cvTerm == null) {
throw new RuntimeException(String.format("CV term '%s:%s' does not exist",
cvName, termName));
}
return cvTerm;
}
public CvTerm getCvTermByNameAndCvNamePattern(String cvTermName, String cvNamePattern) {
@SuppressWarnings("unchecked")
List<CvTerm> cvTermList = getSession()
.createQuery(
"from CvTerm cvTerm where cvTerm.name = :cvTermName and cvTerm.cv.name like :cvNamePattern")
.setString("cvTermName", cvTermName)
.setString("cvNamePattern", cvNamePattern)
.list();
if (cvTermList == null || cvTermList.size() == 0) {
logger.warn("No cvterms found for '" + cvTermName + "' in CV matching '"
+ cvNamePattern + "'");
return null;
}
if (cvTermList.size() > 1) {
logger.error(String.format("Found %d CvTerms with cv matching '%s' and term name '%s'",
cvTermList.size(), cvNamePattern, cvTermName));
}
return cvTermList.get(0);
}
private volatile Db nullDb = null;
private Db nullDb() {
if (nullDb == null) {
synchronized(this) {
if (nullDb == null) {
nullDb = generalDao.getDbByName("null");
}
}
}
return nullDb;
}
/**
* Take a cv and cvterm and look it up, or create it if it doesn't exist
*
* @param cv name of the cv, which must already exist
* @param cvTerm the cvTerm to find/create; case-insensitive, but the supplied
* case will be used if the term is created
* @return the created or looked-up CvTerm
*/
public CvTerm findOrCreateCvTermByNameAndCvName(String cvTermName, String cvName) {
return findOrCreateCvTermByNameAndCvName(cvTermName, cvName, nullDb());
}
/**
* Take a cv and cvterm and look it up, or create it if it doesn't exist
*
* @param cv name of the cv, which must already exist
* @param cvTerm the cvTerm to find/create; case-insensitive, but the supplied
* case will be used if the term is created
* @param db the name of the database to use for the associated DbXRef, if a new term is created
* @return the created or looked-up CvTerm
*/
public CvTerm findOrCreateCvTermByNameAndCvName(String cvTermName, String cvName, String dbName) {
Db db;
if (dbName == null) {
db = nullDb();
} else {
db = generalDao.getDbByName(dbName);
}
return findOrCreateCvTermByNameAndCvName(cvTermName, cvName, db);
}
/**
* Take a cv and cvterm and look it up, or create it if it doesn't exist
*
* @param cv name of the cv, which must already exist
* @param cvTerm the cvTerm to find/create; case-insensitive, but the supplied
* case will be used if the term is created
* @param db the database to use for the associated DbXRef, if a new term is created
* @return the created or looked-up CvTerm
*/
public CvTerm findOrCreateCvTermByNameAndCvName(String cvTermName, String cvName, Db db) {
String cvTermNameTruncatedForCvTerm = cvTermName;
if (cvTermName.length() > CVTERM_MAX_LENGTH) {
logger.warn(String.format("CV Term name is longer than %d characters: %s\n" +
"Truncating to fit in cvterm.name.\n" +
"(The full name will be kept in the CvTerm definition.)",
CVTERM_MAX_LENGTH, cvTermName));
cvTermNameTruncatedForCvTerm = cvTermName.substring(0, CVTERM_MAX_LENGTH);
}
CvTerm cvTerm = this.getCvTermByNameAndCvName(cvTermNameTruncatedForCvTerm, cvName, false);
if (cvTerm == null) {
logger.trace(String.format("CV term '%s:%s' not found; creating with dbxref in DB '%s'",
cvName, cvTermNameTruncatedForCvTerm, db == null ? null : db.getName()));
String cvTermNameTruncatedForDbXRef = cvTermName;
if (cvTermName.length() > DBXREF_ACCESSION_MAX_LENGTH) {
logger.warn(String.format("CV Term name is longer than %d characters: %s\n" +
"Truncating to fit in dbxref.accession.\n"+
"(The full name will be kept in the DbXRef description.)",
DBXREF_ACCESSION_MAX_LENGTH, cvTermName));
cvTermNameTruncatedForDbXRef = cvTermName.substring(0, DBXREF_ACCESSION_MAX_LENGTH);
}
DbXRef dbXRef = new DbXRef(db, cvTermNameTruncatedForDbXRef, cvTermName);
persist(dbXRef);
CvTerm cvterm = new CvTerm(this.getCvByName(cvName), dbXRef, cvTermNameTruncatedForCvTerm, cvTermName);
persist(cvterm);
return cvterm;
}
return cvTerm;
}
public CvTerm getCvTermByDbXRef(DbXRef dbXRef) {
@SuppressWarnings("unchecked")
List<CvTerm> cvTermList = getSession().createQuery(
"from CvTerm cvt where cvt.dbXRef = :dbXRef")
.setParameter("dbXRef", dbXRef)
.list();
if (cvTermList == null || cvTermList.size() == 0) {
return null;
} else {
return cvTermList.get(0);
}
}
public CvTerm getCvTermByDbAcc(String db, String acc) {
@SuppressWarnings("unchecked")
List<CvTerm> cvTermList = getSession().createQuery(
"from CvTerm where dbXRef.db.name= :db and dbXRef.accession = :acc")
.setParameter("db", db)
.setParameter("acc", acc)
.list();
if (cvTermList == null || cvTermList.size() == 0) {
return null;
}
if (cvTermList.size() > 1) {
logger.error(String.format("Found %d CvTerms with db '%s' and accession '%s'",
cvTermList.size(), db, acc));
}
return cvTermList.get(0);
}
public List<CountedName> getAllTermsInCvWithCount(Cv cv) {
@SuppressWarnings("unchecked")
List<CountedName> countedNames = getSession().createQuery(
"select new org.gmod.schema.utils.CountedName(cvt.name, count(fct.feature))"
+ " from FeatureCvTerm fct"
+ " join fct.cvTerm cvt"
+ " where cvt.cv=:cv"
+ " group by cvt.name")
.setParameter("cv", cv).list();
return countedNames;
}
public List<CountedName> getCountedNamesByCvNameAndOrganism(String cvName,
Collection<String> orgs) {
StringBuilder orgNames = new StringBuilder();
boolean first = true;
for (String orgName : orgs) {
if (!first) {
orgNames.append(", ");
}
first = false;
orgNames.append("'" + orgName.replaceAll("'", "''") + "'");
}
@SuppressWarnings("unchecked")
List<CountedName> countedNames = getSession().createQuery(
"select new org.gmod.schema.utils.CountedName(cvt.name, count(fct.feature.uniqueName))"
+ " from FeatureCvTerm fct" + " join fct.cvTerm cvt"
+ " where fct.feature.organism.commonName in (" + orgNames + ")"
+ " and cvt.cv.name=:cvName" + " group by cvt.name"
+ " order by lower(cvt.name), cvt.name")
.setString("cvName", cvName)
.list();
return countedNames;
}
public List<CountedName> getCountedNamesByCvNamePatternAndOrganism(String cvNamePattern,
Collection<String> orgs, boolean justPolypeptides) {
StringBuilder orgNames = new StringBuilder();
String orgQuery = "";
if (orgs != null && orgs.size() > 0) {
boolean first = true;
for (String orgName : orgs) {
if (!first) {
orgNames.append(", ");
}
first = false;
orgNames.append("'" + orgName.replaceAll("'", "''") + "'");
}
orgQuery = " fct.feature.organism.commonName in (" + orgNames + ") and";
}
String typeQuery= "";
if (justPolypeptides) {
typeQuery = " fct.feature.type.name = 'polypeptide'";
}
@SuppressWarnings("unchecked")
List<CountedName> countedNames = getSession().createQuery(
"select new org.gmod.schema.utils.CountedName(cvt.name, count(fct.feature.uniqueName))"
+ " from FeatureCvTerm fct" + " join fct.cvTerm cvt"
+ " where"
+ orgQuery
+ typeQuery
+ " and cvt.cv.name like :cvNamePattern" + " group by cvt.name"
+ " order by cvt.name")
.setString("cvNamePattern", cvNamePattern)
.list();
return countedNames;
}
public List<String> getPossibleMatches(String search, Cv cv, int limit) {
@SuppressWarnings("unchecked")
List<String> result = getSession().createQuery(
"select name from CvTerm where name like '%'||:search||'%' and cv = :cv")
.setString("search", search).setParameter("cv", cv)
.setMaxResults(limit).list();
return result;
}
@SuppressWarnings("unchecked")
public List<CountedName> getCountedNamesByCvNameAndFeature(String cvName,
Polypeptide polypeptide) {
String query = "select new org.gmod.schema.utils.CountedName( fct.cvTerm.name, count"
+ " (fct)) from FeatureCvTerm fct where" + " fct.cvTerm.id in "
+ " (select fct.cvTerm.id from FeatureCvTerm fct, Feature f"
+ " where f=:polypeptide and fct.cvTerm.cv.name=:cvName" + " and fct.feature=f)"
+ " group by fct.cvTerm.name" + " order by fct.cvTerm.name";
List<CountedName> countedNames = getSession().createQuery(query)
.setParameter("polypeptide", polypeptide)
.setString("cvName", cvName)
.list();
return countedNames;
}
/**
* Given a Cv name and Polypeptide feature, find all the cvterms in this
* polypeptide for Cv along with their count for the organism the
* polypeptide belongs
*
* @param cvName the Cv name
* @param polypeptide the Polypeptide feature
* @return a (possibly empty) List<CountedName> of matches
*/
@SuppressWarnings("unchecked")
public List<CountedName> getCountedNamesByCvNameAndFeatureAndOrganism(String cvName,
Polypeptide polypeptide) {
/**
* the distinct clause in the query counts only once if there is more
* than FeatureCvTerm for a Feature with a particular CvTerm
*/
String query = "select new org.gmod.schema.utils.CountedName( fct.cvTerm.name, count"
+ " (distinct fct.feature)) from FeatureCvTerm fct where"
+ " fct.feature.organism.commonName=:organism and " + " fct.cvTerm.id in "
+ " (select fct.cvTerm.id from FeatureCvTerm fct, Feature f"
+ " where f=:polypeptide and fct.cvTerm.cv.name=:cvName" + " and fct.feature=f)"
+ " group by fct.cvTerm.name" + " order by fct.cvTerm.name";
List<CountedName> countedNames = getSession().createQuery(query)
.setParameter("polypeptide", polypeptide)
.setString("cvName", cvName)
.setString("organism", polypeptide.getOrganism().getCommonName())
.list();
return countedNames;
}
/**
* Given a Cv name and Polypeptide feature, find all the cvterms in this
* polypeptide for Cv along with their count for the organism the
* polypeptide belongs
*
* @param cvNamePattern a pattern (HQL/SQL syntax) to match against the CV
* name
* @param polypeptide the Polypeptide feature
* @return a (possibly empty) List<CountedName> of matches
*/
@SuppressWarnings("unchecked")
public List<CountedName> getCountedNamesByCvNamePatternAndFeatureAndOrganism(
String cvNamePattern, Polypeptide polypeptide) {
/**
* the distinct clause in the query counts only once if there is more
* than FeatureCvTerm for a Feature with a particular CvTerm
*/
String query = "select new org.gmod.schema.utils.CountedName( fct.cvTerm.name, count"
+ " (distinct fct.feature)) from FeatureCvTerm fct where"
+ " fct.feature.organism.commonName=:organism and " + " fct.cvTerm.id in "
+ " (select fct.cvTerm.id from FeatureCvTerm fct, Feature f"
+ " where f=:polypeptide and fct.cvTerm.cv.name LIKE :cvNamePattern"
+ " and fct.feature=f)" + " group by fct.cvTerm.name" + " order by fct.cvTerm.name";
List<CountedName> countedNames = getSession().createQuery(query)
.setParameter("polypeptide", polypeptide)
.setString("cvNamePattern", cvNamePattern)
.setString("organism", polypeptide.getOrganism().getCommonName())
.list();
return countedNames;
}
private Map<Class<? extends Feature>, CvTerm> cvTermsByClass = Collections.synchronizedMap(new HashMap<Class<? extends Feature>, CvTerm>());
/**
* Get the CvTerm that represents the type of a particular feature class.
*
* @param annotatedClass the feature class
* @return the corresponding CV term
*/
public CvTerm getCvTermForAnnotatedClass(Class<? extends Feature> annotatedClass) {
if (cvTermsByClass.containsKey(annotatedClass)) {
return cvTermsByClass.get(annotatedClass);
}
FeatureType featureType = FeatureTypeUtils.getFeatureTypeForClass(annotatedClass);
if (featureType == null) {
throw new IllegalArgumentException(String.format("The class '%s' has no @FeatureType annotation", annotatedClass.getName()));
}
CvTerm cvTerm = getCvTermForFeatureType(featureType);
cvTermsByClass.put(annotatedClass, cvTerm);
return cvTerm;
}
private CvTerm getCvTermForFeatureType(FeatureType featureType) {
if (!"".equals(featureType.term())) {
return this.getCvTermByNameAndCvName(featureType.term(), featureType.cv());
} else {
return this.getCvTermByAccessionAndCvName(featureType.accession(), featureType.cv());
}
}
}