/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1574 $ by $Author: glycoslave $ on $Date:: 2009-07-24 #$
*/
package org.eurocarbdb.dataaccess.core.seq;
// stdlib imports
import java.util.Map;
import java.util.Set;
import java.util.List;
import java.util.HashMap;
import java.util.EnumSet;
import java.util.ArrayList;
import java.util.Collections;
// 3rd party imports
import org.apache.log4j.Logger;
import org.hibernate.Query;
import org.hibernate.Session;
import org.hibernate.Criteria;
// eurocarb imports
import org.eurocarbdb.util.graph.Graph;
import org.eurocarbdb.util.graph.Edge;
import org.eurocarbdb.util.graph.Vertex;
import org.eurocarbdb.util.graph.DepthFirstGraphVisitor;
import org.eurocarbdb.sugar.Sugar;
import org.eurocarbdb.sugar.Anomer;
import org.eurocarbdb.sugar.Linkage;
import org.eurocarbdb.sugar.Residue;
import org.eurocarbdb.sugar.SugarSequence;
import org.eurocarbdb.sugar.Substituent;
import org.eurocarbdb.sugar.Monosaccharide;
import org.eurocarbdb.sugar.GlycosidicLinkage;
import org.eurocarbdb.dataaccess.Eurocarb;
import org.eurocarbdb.dataaccess.EntityManager;
import org.eurocarbdb.dataaccess.HibernateEntityManager;
import org.eurocarbdb.dataaccess.core.GlycanSequence;
import org.eurocarbdb.dataaccess.core.seq.GlycanResidue;
// static imports
/**
* Implements carbohydrate sub-structure searching; specifically,
* performs the translation of a given {@link Sugar} (or
* {@link Graph} of {@link Linkage}s and {@link Residue}s) to
* a hibernate query language (HQL) string using a
* {@link SubstructureQueryGenerator} along with
* {@link SubstructureQuery.Option}s, and returns the resulting
* substructure search results as a {@link List} of
* {@link SubstructureQueryResult}s.
*
*<h2>Usage</h2>
*<pre>
* // construct a search Sugar structure (or equivalent search Graph).
* Sugar search_structure = ...;
*
* // create a substruct query from the search structure
* SubstructureQuery query = new SubstructureQuery( search_structure );
*
* // get the HQL query string for the structure (not needed for search
* // -- only if you want it for some reason).
* String query_string = query.getQueryString();
*
* // perform query
* query.execute();
*
* // play with results...
* List<SubstructureQueryResult> results = query.getResults();
*</pre>
* Note that by default, returned results can contain the same
* {@link GlycanSequence} multiple times, if the substructure is found
* multiple times within the same structure. Use the
* {@link SubstructureQuery.Option#Distinct} option if you want to suppress
* multiples.
*
* @see SubstructureQueryResult
* @see SubstructureQueryCriterion
* @see SubstructureQueryGenerator
* @author mjh
*/
public class SubstructureQuery
{
/** logging handle shared between all SubstructureQuery*.java classes. */
static final Logger log = Logger.getLogger( SubstructureQuery.class );
static boolean DEBUGGING = log.isDebugEnabled();
/** If true, shows the full string representation of structures that
* match to the {@link #log debug logs} for this class. it's really
* slow, so best to set false unless bug hunting. */
static final boolean VERBOSE_LOGGING = false;
/** This is the maximum number of residues allowed in a search
* structure for a substructure search before the query will be
* truncated. The rationale is that the final result set is almost
* always arrived at well before this limit is reached so truncating
* the query makes no difference to the results but cuts the query time
* down a lot. */
public static final int MAX_SUBSTRUCTURE_RESIDUES = 25;
/** The search (sub-)structure as graph */
private Graph<Linkage,Residue> graph;
/** Set of essentially boolean options to modify characteristics of
* this {@link SubstructureQuery}. Initially null; presence of an option
* in the {@link Set} means that option is TRUE. */
EnumSet<Option> options = EnumSet.noneOf( Option.class );
/** Query search string as SQL; null if query not yet performed. */
private String queryString = null;
/** Query search results; null if query not yet performed. */
private List<SubstructureQueryResult> results = null;
//~~~~~~~~~~~~~~~~~~~~~~ CONSTRUCTORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~
/**
* Creates a new SubstructureQuery for the passed {@link Sugar}.
*/
public SubstructureQuery( Sugar s )
{
assert s != null;
this.graph = s.getGraph();
}
/**
* Creates a new SubstructureQuery for the passed {@link SugarSequence}.
*/
public SubstructureQuery( SugarSequence ss )
{
this( ss.getSugar() );
}
/**
* Creates a new SubstructureQuery built from the {@link Graph} of
* the passed {@link Sugar}.
*/
public SubstructureQuery( GlycanSequence gs )
{
this( gs.getSugar() );
}
//~~~~~~~~~~~~~~~~~~~~~~~~~ METHODS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/**
* Returns the {@link Sugar} {@link Graph} used to construct
* this query.
*/
protected Graph<Linkage,Residue> getGraph()
{
return graph;
}
/**
* Returns a SQL query string for the search substructure
* {@link Sugar}/{@link Graph} given at construction.
*/
public String getQueryString()
{
if ( queryString != null )
return queryString;
if ( DEBUGGING )
{
log.debug(
"generating substructure query for graph:\n"
+ graph.toString()
);
}
// Impl2 is currently the fastest
SubstructureQueryGenerator generator
= new SubstructureQueryGeneratorImpl2( this );
queryString = generator.getQueryString();
// query string is already logged in the visitor
if ( log.isTraceEnabled() )
log.trace("generated query string:\n " + queryString );
return queryString;
}
/**
* Returns this substructure query as a Hibernate
* {@link Criterion} subclass, allowing it to be used in arbitrary
* {@link Criteria} queries.
*
* @see SubstructureQueryCriterion
*/
public SubstructureQueryCriterion getQueryCriterion()
{
return new SubstructureQueryCriterion( this );
}
/**
* Returns a {@link List} of results for the given substructure
* search, or an empty list if there were no results.
*/
public List<SubstructureQueryResult> getResults()
{
if ( results == null )
execute();
return results;
}
static final Session getHibernateSession()
{
EntityManager em = Eurocarb.getEntityManager();
if ( ! (em instanceof HibernateEntityManager) )
throw new UnsupportedOperationException(
"Need a HibernateEntityManager to perform substructure query");
// this is for Hibernate specifically, so cast
HibernateEntityManager hem = (HibernateEntityManager) em;
Session hbsession = hem.getHibernateSession();
return hbsession;
}
/** Executes query; results available from {@link #getResults()}. */
public void execute()
{
// mjh: the postgres genetic query optimiser must be either turned off
// or larger than the number of joins in the query. this is *critical*
// for good performance of larger (> 6-10 residue) substruct queries.
//
// this setting is normally set on application startup, but if this
// class is run outside this context, it must be set manually using
// code similar to the following:
//
//-----
// Session hbs = getHibernateSession();
// if ( MAX_SUBSTRUCTURE_RESIDUES > 10 )
// {
// int i = MAX_SUBSTRUCTURE_RESIDUES + 1;
// log.debug("setting genetic query optimiser threshold to " + i + " (force geqo OFF)" );
// String force_geqo_off = "set geqo_threshold = " + i;
// hbs.createSQLQuery( force_geqo_off ).executeUpdate();
// }
//-----
//
// create & perform main substructure query
Query q = getQuery();
// for sql version:
// String sql = HibernateEntityManager.translateHql2Sql( queryString );
// Query q = hbs.createSQLQuery( sql ).addEntity( GlycanSequence.class);
List<GlycanSequence> sequences = (List<GlycanSequence>) q.list();
// if there are results, add them to a results list,
// else return an empty list.
if ( sequences != null )
{
int count_results = sequences.size();
log.info( "substructure search returned "
+ count_results
+ " result(s)" );
this.results = new ArrayList<SubstructureQueryResult>( count_results );
for ( int i = 0; i < count_results; i++ )
{
GlycanSequence gs = sequences.get(i);
this.results.add(
new SubstructureQueryResult( gs ) );
}
}
else
{
log.info( "substructure search returned no results" );
this.results = Collections.emptyList();
}
}
private Query getQuery()
{
String query = getQueryString();
return getHibernateSession()
.createSQLQuery( queryString )
.addEntity( GlycanSequence.class );
}
/**
* Resets the query so that it may be run again, using for
* example, different {@link Option}s.
*/
public void reset()
{
queryString = null;
}
/* query options */
/** Returns true if the given search {@link Option} is set. */
public boolean getOption( Option opt )
{
return options.contains( opt );
}
/**
* Returns true if the given search {@link Option} name is set.
* @throws IllegalArgumentException if option_name is not
* a valid {@link Option} name.
*/
public boolean getOption( String option_name )
{
return options.contains(
Enum.valueOf( Option.class, option_name ) );
}
/** Sets various search options to modify the characteristics of the search. */
public SubstructureQuery setOption( Option opt )
{
options.add( opt );
return this;
}
/* enum Option *//*********************************************
*
* Specifies various options and meta-data to modify the performance
* and results of a {@link SubstructureQuery}. These are currently
* all boolean options, with default value == false.
*
* @author mjh
*/
public enum Option
{
/**
* Specifies that multiple matches of the query substructure
* should only return the matching {@link GlycanSequence} once,
* where ordinarily, every distinct match instance would be
* returned.
*/
Distinct
,
/**
* Specifies that the reducing terminus (root) {@link Residue} of the
* given search sub-structure must also be the reducing terminus (root)
* terminus of all matching structures.
*/
Must_Include_Reducing_Terminus
{
/** add a predicate that the root residue of the search substruct
* must also be the root of all matching structures. */
void modifyQuery( SubstructureQueryGenerator q, Set<Option> options )
{
Residue root = q.getSearchGraph().getRootValue();
String alias = q.getTableAliasFor( root );
log.debug("adding root residue predicate for " + root );
q.addPredicate( alias + ".parent_id is null" );
}
}
,
/**
* Specifies that all of the non-reducing terminal (leaf) {@link Residue}s
* of the given search sub-structure must also be non-reducing
* terminii in all matching structures.
*/
Must_Include_All_Non_Reducing_Terminii
{
/**
* Adds a predicate that each leaf in the search substruct must also
* be a leaf in matching structures.
*/
void modifyQuery( SubstructureQueryGenerator q, Set<Option> options )
{
Set<Residue> leaves = q.getSearchGraph().getLeafValues();
if ( leaves.size() == 0 )
throw new RuntimeException("Leaves shouldn't ever be empty...");
for ( Residue r : leaves )
{
String alias = q.getTableAliasFor( r );
log.debug("adding leaf residue predicate for " + r );
q.addPredicate(
alias
+ ".right_index - "
+ alias
+ ".left_index = 1"
);
}
}
}
,
/**
* Causes the query engine not to include constraints for
* linkage elements (anomer, reducing and non-reducing terminal
* positions).
*/
Ignore_Linkages
// {
// /** Empties the linkage predicate list. */
// void modifyQuery( SubstructureQueryGenerator q, Set<Option> options )
// {
// if ( DEBUGGING )
// log.debug("Ignore_Linkages: clearing linkage predicates");
// if ( options.contains( Ignore_Residues ) )
// log.warn("Ignore_Linkages & Ignore_Residues are both set");
// q.linkagePredicates.clear();
// }
// }
,
/**
* Causes the query engine not to include constraints for
* residue identity -- effectively making so that only the linkages
* in the query substructures matter.
*/
Ignore_Residues
// {
// /** Empties the linkage predicate list. */
// void modifyQuery( SubstructureQueryGenerator q, Set<Option> options )
// {
// if ( DEBUGGING )
// log.debug("Ignore_Residues: clearing residue identity predicates");
// if ( options.contains( Ignore_Linkages ) )
// log.warn("Ignore_Linkages & Ignore_Residues are both set");
// q.residuePredicates.clear();
// }
// }
,
/**
* Causes the query engine to disregard the residue name/identity
* of {@link Monosaccharide}s and focus purely on their stereochemistry
* only, effectively disregarding substituents. This option has no
* effect on {@link Residue}s that are {@link Substituent}s.
*/
Ignore_Monosac_Substituents
; //--- end of enum constants ^^^
/**
* Callback for {@link Option} enum values to modify the
* {@link SubstructureQuery} on which they have been set.
*/
void modifyQuery( SubstructureQueryGenerator q, Set<Option> options )
{
/* do nothing by default */
}
} // end enum Option
} // end class SubstructureQuery