package org.genedb.db.loading;
import org.genedb.db.dao.CvDao;
import org.genedb.db.dao.GeneralDao;
import org.genedb.db.dao.PubDao;
import org.genedb.db.dao.SequenceDao;
import org.gmod.schema.mapped.CvTerm;
import org.gmod.schema.mapped.Db;
import org.gmod.schema.mapped.DbXRef;
import org.gmod.schema.mapped.Feature;
import org.gmod.schema.mapped.FeatureCvTerm;
import org.gmod.schema.mapped.FeatureCvTermDbXRef;
import org.gmod.schema.mapped.FeatureCvTermProp;
import org.gmod.schema.mapped.Pub;
import org.gmod.schema.mapped.PubDbXRef;
import org.gmod.schema.utils.ObjectManager;
import org.gmod.schema.utils.Rankable;
import org.apache.log4j.Logger;
import org.hibernate.Session;
import org.hibernate.SessionFactory;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.orm.hibernate3.SessionFactoryUtils;
import org.springframework.transaction.annotation.Transactional;
import java.util.BitSet;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
@Transactional
public class FeatureUtils implements InitializingBean {
private static final Logger logger = Logger.getLogger(FeatureUtils.class);
private CvDao cvDao;
private PubDao pubDao;
private SequenceDao sequenceDao;
private GeneralDao generalDao;
private ObjectManager objectManager;
private SessionFactory sessionFactory;
private static final Pattern PUBMED_PATTERN = Pattern.compile("PMID:|PUBMED:", Pattern.CASE_INSENSITIVE);
private static final Pattern GO_REF_PATTERN = Pattern.compile("GO_REF:", Pattern.CASE_INSENSITIVE);
private CvTerm GO_KEY_EVIDENCE, GO_KEY_QUALIFIER, GO_KEY_ATTRIBUTION,
GO_KEY_RESIDUE, GO_KEY_DATE, GENEDB_AUTOCOMMENT;
private CvTerm PUB_TYPE_UNFETCHED;
private Db PMID_DB, GOREF_DB, INTERPRO_DB, PFAM_DB;
private int NULL_PUB_ID;
public void afterPropertiesSet() {
logger.trace("Initialising FeatureUtils");
objectManager.setDaos(generalDao, pubDao, cvDao);
PMID_DB = objectManager.getExistingDbByName("PMID");
GOREF_DB = objectManager.getExistingDbByName("GO_REF");
INTERPRO_DB = objectManager.getExistingDbByName("InterPro");
PFAM_DB = objectManager.getExistingDbByName("Pfam");
GO_KEY_EVIDENCE = cvDao.getExistingCvTermByNameAndCvName("evidence", "genedb_misc");
GO_KEY_ATTRIBUTION = cvDao.getExistingCvTermByNameAndCvName("attribution", "genedb_misc");
GO_KEY_RESIDUE = cvDao.getExistingCvTermByNameAndCvName("residue", "genedb_misc");
GO_KEY_DATE = cvDao.getExistingCvTermByNameAndCvName("date", "feature_property");
GO_KEY_QUALIFIER = cvDao.getExistingCvTermByNameAndCvName("qualifier", "genedb_misc");
GENEDB_AUTOCOMMENT = cvDao.getExistingCvTermByNameAndCvName("autocomment", "genedb_misc");
PUB_TYPE_UNFETCHED = cvDao.getExistingCvTermByNameAndCvName("unfetched", "genedb_literature");
Pub NULL_PUB = pubDao.getPubByUniqueName("null");
if (NULL_PUB == null) {
throw new RuntimeException("Could not find Pub with uniqueName 'null'");
}
NULL_PUB_ID = NULL_PUB.getPubId();
}
/**
* Create or lookup a Pub object from a PMID:acc style input, although the
* prefix is ignored.
*
* @param ref the reference
* @return the Pub object
*/
private Pub findOrCreatePubFromPMID(String ref) {
String accession = ref.substring(1 + ref.indexOf(':')); // Text after first colon, or whole string if no colon
logger.trace(String.format("Looking for Pub object with accession number '%s'", accession));
DbXRef dbXRef = generalDao.getDbXRefByDbAndAcc(PMID_DB, accession); //objectManager.getDbXRef("PUBMED", accession);
Pub pub;
if (dbXRef == null) {
logger.trace(String.format("Could not find DbXRef for PUBMED:%s; creating new DbXRef", accession));
dbXRef = new DbXRef(PMID_DB, accession);
generalDao.persist(dbXRef);
pub = pubDao.getPubByUniqueName("PMID:" + accession);
if (pub == null) {
logger.trace("Pub not found in database");
pub = new Pub("PMID:" + accession, PUB_TYPE_UNFETCHED);
pubDao.persist(pub);
} else {
logger.trace("Pub already in database (even though the DbXRef was not)");
}
PubDbXRef pubDbXRef = new PubDbXRef(pub, dbXRef, true);
generalDao.persist(pubDbXRef);
} else {
pub = pubDao.getPubByDbXRef(dbXRef);
logger.trace(String.format("Found dbxref '%s'; corresponding Pub is '%s'", dbXRef, pub));
}
return pub;
}
/**
* Create or lookup a GO_REF dbxref
*
*
* @param ref the reference
* @return the Dbxref object
*/
private DbXRef findOrCreateGoRefDbxref(String ref) {
String accession = ref.substring(1 + ref.indexOf(':')); // Text after first colon, or whole string if no colon
logger.trace(String.format("Looking for dbxref object with accession number '%s'", accession));
DbXRef dbXRef = generalDao.getDbXRefByDbAndAcc(GOREF_DB, accession); //objectManager.getDbXRef("PUBMED", accession);
if (dbXRef == null) {
logger.trace(String.format("Could not find DbXRef for GO_REF:%s; creating new DbXRef", accession));
dbXRef = new DbXRef(GOREF_DB, accession);
generalDao.persist(dbXRef);
}
return dbXRef;
}
/**
* Create or lookup a Interpro dbxref
*
*
* @param ref the reference
* @return the Dbxref object
*/
public DbXRef findOrCreateDbXRefForWithFrom(String ref) {
if(!ref.startsWith("InterPro:") && !ref.startsWith("Pfam:"))
return null;
Db db = (ref.startsWith("InterPro:") ? INTERPRO_DB : PFAM_DB);
String accession = ref.substring(1 + ref.indexOf(':')); // Text after first colon, or whole string if no colon
logger.trace(String.format("Looking for dbxref object with accession number '%s'", accession));
DbXRef dbXRef = generalDao.getDbXRefByDbAndAcc(db, accession);
if (dbXRef == null) {
logger.trace(String.format("Could not find DbXRef for GO_REF:%s; creating new DbXRef", accession));
dbXRef = new DbXRef(GOREF_DB, accession);
generalDao.persist(dbXRef);
}
return dbXRef;
}
/**
* Does a string look like a PubMed reference?
*
* @param xref The string to examine
* @return true if it looks like a PubMed reference
*/
private boolean looksLikePub(String xref) {
return PUBMED_PATTERN.matcher(xref).lookingAt();
}
/**
* Does a string look like a GO_REF reference?
*
* @param xref The string to examine
* @return true if it looks like a GO_REF reference
*/
private boolean looksLikeGoRef(String xref) {
return GO_REF_PATTERN.matcher(xref).lookingAt();
}
/*
* Pre-caching the name -> id mapping is a big win compared with
* doing a new query every time, when doing a data load. It uses a
* chunk of memory though, and perhaps a less aggressive strategy
* would give a better time/space tradeoff.
*/
private volatile Map<String, Integer> goTermIdsByAcc = null;
private CvTerm getGoTerm(String id) {
if (goTermIdsByAcc == null) {
// Double-checked locking of a volatile variable is
// JMM-compliant since JVM 1.5.
synchronized (this) {
if (goTermIdsByAcc == null) {
goTermIdsByAcc = cvDao.getGoTermIdsByAcc();
}
}
}
if (!goTermIdsByAcc.containsKey(id)) {
return null;
}
return cvDao.getCvTermById(goTermIdsByAcc.get(id));
}
public void createGoEntries(Feature polypeptide, GoInstance go, String comment,
DbXRef withFromDbXRef) throws DataError {
if (withFromDbXRef == null)
createGoEntries(polypeptide, go, comment, Collections.<DbXRef>emptyList());
else
createGoEntries(polypeptide, go, comment, Collections.singletonList(withFromDbXRef));
}
public void createGoEntries(Feature polypeptide, GoInstance go, String comment,
List<DbXRef> withFromDbXRefs) throws DataError {
Session session = SessionFactoryUtils.getSession(sessionFactory, false);
CvTerm cvTerm = getGoTerm(go.getId());
if (cvTerm == null) {
throw new DataError("Unable to find a CvTerm for the GO id of '" + go.getId() + "'.");
}
// Reference
String ref = go.getRef();
Pub refPub = (Pub) session.load(Pub.class, NULL_PUB_ID);
DbXRef dbxref = null;
if (ref != null && looksLikePub(ref)) {
// The reference is a pubmed id - usual case
refPub = findOrCreatePubFromPMID(ref);
} else if (ref != null && looksLikeGoRef(ref)){
/* The embl loader was only willing to accept PMID/PUBMED as valid
* dbxrefs for a GO term. However, GO_REF dbxrefs are also valid
* and this is why this extension was made to the code.
* nds, 26 Oct 2011
*/
dbxref = findOrCreateGoRefDbxref(ref);
} else {
logger.warn(String.format("Ignoring db_xref '%s' from GO entry", ref));
}
boolean not = go.getQualifierList().contains("not"); // FIXME - Working?
List<FeatureCvTerm> fcts = sequenceDao.getFeatureCvTermsByFeatureAndCvTermAndNot(
polypeptide, cvTerm, not);
int rank = 0;
if (fcts.size() != 0) {
rank = getNextRank(fcts);
}
logger.trace(String.format("Creating new FeatureCvTerm for GO entry (%s, %s, %s, %b, %d)",
cvTerm, polypeptide, refPub, not, rank));
FeatureCvTerm fct = new FeatureCvTerm(cvTerm, polypeptide, refPub, not, rank);
sequenceDao.persist(fct);
//GO_REF
if(dbxref!=null){
sequenceDao.persist(new FeatureCvTermDbXRef(fct, dbxref));
}
//autocomment
sequenceDao.persist(new FeatureCvTermProp(GENEDB_AUTOCOMMENT, fct, comment, 0));
// Evidence
FeatureCvTermProp evidenceProp = new FeatureCvTermProp(GO_KEY_EVIDENCE, fct,
go.getEvidence().getDescription(), 0);
sequenceDao.persist(evidenceProp);
// Date
FeatureCvTermProp dateProp = new FeatureCvTermProp(GO_KEY_DATE, fct,
go.getDate(), 0);
sequenceDao.persist(dateProp);
// Attribution
String attribution = go.getAttribution();
if (attribution != null) {
sequenceDao.persist(new FeatureCvTermProp(GO_KEY_ATTRIBUTION, fct, attribution, 0));
}
// Residue
String residue = go.getResidue();
if (residue != null) {
sequenceDao.persist(new FeatureCvTermProp(GO_KEY_RESIDUE, fct, residue, 0));
}
// Qualifiers
int qualifierRank = 0;
List<String> qualifiers = go.getQualifierList();
for (String qualifier : qualifiers) {
FeatureCvTermProp qualifierProp
= new FeatureCvTermProp(GO_KEY_QUALIFIER, fct, qualifier, qualifierRank);
qualifierRank++;
sequenceDao.persist(qualifierProp);
}
// With/From
for (DbXRef withFromDbXRef: withFromDbXRefs) {
sequenceDao.persist(new FeatureCvTermDbXRef(fct, withFromDbXRef));
}
}
<T extends Rankable> int getNextRank(List<T> list) {
BitSet bs = new BitSet(list.size() + 1);
for (Rankable r : list) {
bs.set(r.getRank());
}
return bs.nextClearBit(0);
}
public void setObjectManager(ObjectManager objectManager) {
this.objectManager = objectManager;
}
public void setPubDao(PubDao pubDao) {
this.pubDao = pubDao;
}
public void setCvDao(CvDao cvDao) {
this.cvDao = cvDao;
}
public void setSequenceDao(SequenceDao sequenceDao) {
this.sequenceDao = sequenceDao;
}
public void setGeneralDao(GeneralDao generalDao) {
this.generalDao = generalDao;
}
public SessionFactory getSessionFactory() {
return sessionFactory;
}
public void setSessionFactory(SessionFactory sessionFactory) {
this.sessionFactory = sessionFactory;
}
}