/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1574 $ by $Author: glycoslave $ on $Date:: 2009-07-24 #$ */ package org.eurocarbdb.dataaccess.core.seq; import org.apache.log4j.Logger; import java.util.Map; import java.util.Set; import java.util.List; import java.util.EnumSet; import java.util.HashMap; import java.util.ArrayList; import org.eurocarbdb.util.graph.Graph; import org.eurocarbdb.util.graph.Edge; import org.eurocarbdb.util.graph.Vertex; import org.eurocarbdb.util.graph.DepthFirstGraphVisitor; import org.eurocarbdb.sugar.Sugar; import org.eurocarbdb.sugar.Anomer; import org.eurocarbdb.sugar.Linkage; import org.eurocarbdb.sugar.Residue; import org.eurocarbdb.sugar.Monosaccharide; import org.eurocarbdb.sugar.GlycosidicLinkage; import org.eurocarbdb.dataaccess.HibernateEntityManager; import org.eurocarbdb.dataaccess.core.GlycanSequence; import org.eurocarbdb.dataaccess.core.seq.GlycanResidue; import static org.eurocarbdb.util.StringUtils.join; import static org.eurocarbdb.dataaccess.core.seq.SubstructureQuery.Option.*; /** * Implementation of {@link SubstructureQueryGenerator} that favours * putting all predicates into the WHERE clause and ordering them for * optimal performance. * * Generates HQL internally, which is converted to SQL via * {@link HibernateEntityManager#translateHql2Sql}. * * @see SubstructureQuery * @see SubstructureQueryResult * @see SubstructureQuery.Option * @see <a href="http://docs.jboss.org/hibernate/stable/core/reference/en/html/">hibernate docs</a> * * @author mjh */ public class SubstructureQueryGeneratorImpl1 extends DepthFirstGraphVisitor<Linkage,Residue> implements SubstructureQueryGenerator { /** logging handle */ static final Logger log = SubstructureQuery.log; /** tuneable query optimisation parameter: this is the min number of * residues in the search graph at which we add an optimisation * predicate to exclude searching graphs smaller than this number. * currently, this does not appear to have a huge effect on performance * but since it is very cheap to generate & include at both the code & sql * level, the default is to always add it. */ private static final int MIN_DESCENDANTS_B4_OPTIMISATION_APPLIES = 0; //~~~~~~~~~~~~~~~~~~~~~~~ PROPERTIES ~~~~~~~~~~~~~~~~~~~~~~~~~~ /** counter for table aliases to track which residue is which, with * the first (root) residue in the depth-first traversal being 1, * and incrementing for each subsequent Residue encountered. */ private int counter = 1; /** List of String aliases to table names, 1 table per residue. */ private List<String> ids; /** list of table joins, declaring table aliases */ private List<String> joins; /** list of table join predicates */ List<String> joinPredicates; /** list of WHERE predicates for residues */ List<String> residuePredicates; /** list of WHERE predicates for linkages */ List<String> linkagePredicates; /** list of WHERE predicates for other stuff */ List<String> otherPredicates; /** the source of the substruct query; specifies various options * & meta-data that may add additional predicates to query */ private final SubstructureQuery query; /** the graph of the search substructure */ private final Graph<Linkage,Residue> graph; /** maps a residue in the search graph to its table alias */ private Map<Residue,String> tableAliasMap; //~~~~~~~~~~~~~~~~~~~~~~~ CONSTRUCTORS ~~~~~~~~~~~~~~~~~~~~~~~~~~ public SubstructureQueryGeneratorImpl1( SubstructureQuery q ) { this.query = q; this.graph = q.getGraph(); // SELECT clause: one id for each *vertex* in graph this.ids = new ArrayList<String>( graph.countVertices() ); this.tableAliasMap = new HashMap<Residue,String>( graph.countVertices() ); // FROM clause: one join for each *edge* in graph this.joins = new ArrayList<String>( graph.countEdges() ); // WHERE clause: any number of where predicates // where predicates are broken into different types because // order of predicate inclusion DOES have a large effect on // query performance. this.joinPredicates = new ArrayList<String>( joins.size() ); this.residuePredicates = new ArrayList<String>(); this.linkagePredicates = new ArrayList<String>(); this.otherPredicates = new ArrayList<String>(2); } //~~~~~~~~~~~~~~~~~~~~~~~~~ METHODS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /** * Returns the substructure {@link Graph} for which we're * generating a query. */ public Graph<Linkage,Residue> getSearchGraph() { return graph; } /** Returns HQL query string representing given search sub-structure. */ public String getQueryString() { visit( graph ); processOptions(); String hql = buildQueryString(); if ( log.isDebugEnabled() ) log.debug("generated HQL substructure query:\n" + hql ); String sql = HibernateEntityManager.translateHql2Sql( hql ); return sql; } String buildQueryString() { // assemble all WHERE predicates // note: order DOES have a significant influence on query performance. int size = joinPredicates.size() + residuePredicates.size() + linkagePredicates.size() + otherPredicates.size(); List<String> predicates = new ArrayList<String>( size ); predicates.addAll( residuePredicates ); predicates.addAll( linkagePredicates ); predicates.addAll( otherPredicates ); predicates.addAll( joinPredicates ); // add join+predicate for GlycanSequence info // glycan_sequence join should always go LAST, as this // produces the most efficient query plan for postgres joins.add("GlycanSequence gs"); predicates.add( ids.get(0) + ".glycanSequence = gs"); // assemble final query String hql = ( query.getOption( Distinct ) ? "select distinct gs " : "select gs " ) + "\n" + "from " + join(", ", joins ) + "\n" + "where " + join("\nand ", predicates ) ; return hql; } /** * {@inheritDoc} * * Supports: Distinct * , Ignore_Linkages * , Must_Include_Reducing_Terminus * , Must_Include_All_Non_Reducing_Terminii */ public Set<SubstructureQuery.Option> getSupportedOptions() { return EnumSet.of( Distinct , Ignore_Linkages , Must_Include_Reducing_Terminus , Must_Include_All_Non_Reducing_Terminii ); } /** * Returns the String table alias for the table representing * the given Residue. Creates new aliases as needed. */ public String getTableAliasFor( Residue r ) { if ( tableAliasMap.containsKey( r ) ) return tableAliasMap.get( r ); // // assert name != null && name.length() > 0; // String name = r.getName().replace('-', '_'); // // for ease of debugging, aliases are named after the residue // String alias = "r" + name + '_' + counter; String alias = "r" + counter; counter++; ids.add( alias ); tableAliasMap.put( r, alias ); joins.add( "GlycanResidue " + alias ); return alias; } @Override public void accept( Graph<Linkage,Residue> g ) { // ensure no sub-graphs for the moment if ( this.graph != g ) throw new UnsupportedOperationException( "Substructure queries with sub-graphs are not yet supported"); Residue root = g.getRootVertex().getValue(); int min_size = g.countVertices() - 1; addSizePredicate( root, min_size ); super.accept( g ); } @Override public void accept( Edge<Linkage,Residue> edge ) { Linkage link = edge.getValue(); Residue parent = edge.getParent().getValue(); Residue child = edge.getChild().getValue(); addJoinPredicate( parent, child ); if ( ! query.getOption( Ignore_Linkages ) ) addLinkagePredicates( parent, child, link ); super.accept( edge ); } @Override public void accept( Vertex<Linkage,Residue> vertex ) { Residue r = vertex.getValue(); addResiduePredicate( r ); super.accept( vertex ); } /** Adds a {@link String} WHERE predicate */ public void addPredicate( String predicate ) { otherPredicates.add( predicate ); } /** * Add a table join predicate for the link between the given * parent and child {@link Residue}s. */ public void addJoinPredicate( Residue parent, Residue child ) { String join_predicate = getTableAliasFor( parent ) + " = " + getTableAliasFor( child ) + ".parent" ; if ( log.isDebugEnabled() ) log.debug("adding WHERE clause join predicate: " + join_predicate ); joinPredicates.add( join_predicate ); } /** * Add WHERE predicates for the given {@link Linkage} between the * given parent/child {@link Residue}s. * * @see Option#Ignore_Linkages */ public void addLinkagePredicates( Residue parent, Residue child, Linkage link ) { addLinkagePredicate( child, "linkage.parentTerminus", link.getParentTerminus() ); addLinkagePredicate( child, "linkage.childTerminus", link.getChildTerminus() ); // if ( link instanceof GlycosidicLinkage ) // { // Anomer a = ((GlycosidicLinkage) link).getAnomer(); // if ( a == null && (child instanceof Monosaccharide) ) // a = ((Monosaccharide) child).getAnomer(); // // addAnomerPredicate( child, "linkage.anomer", a ); // addAnomerPredicate( child, "anomer", a ); // } } /** Add predicate(s) that specify the identity of the given {@link Residue}. */ public void addResiduePredicate( Residue r ) { String predicate = getTableAliasFor( r ) + ".residueName = '" // + r.getResidue().getName(); + r.getName() + "'"; if ( log.isDebugEnabled() ) log.debug("adding residue identity predicate: " + predicate ); residuePredicates.add( predicate ); // add anomer predicate, if applicable. if ( r instanceof Monosaccharide ) { Anomer a = ((Monosaccharide) r).getAnomer(); if ( a != null ) addAnomerPredicate( r, "anomer", a ); } addSiblingDisambiguationPredicateIfNeeded( r ); } /** * if given residue has 2 or more children, then there needs to * be a predicate added to ensure that we don't match the * same child residue twice. this will be a factorial-style * expansion of negated equality of sibling child residues of * the given residue. */ private void addSiblingDisambiguationPredicateIfNeeded( Residue r ) { List<Edge<Linkage,Residue>> children = graph.getVertex( r ).getOutgoingEdges(); if ( children.size() > 1 ) { for ( int i = 0; i < children.size(); i++ ) { for ( int j = i + 1; j < children.size(); j++ ) { Residue r1 = children.get( i ).getChild().getValue(); Residue r2 = children.get( j ).getChild().getValue(); assert r1 != r2; String disambiguation = getTableAliasFor( r1 ) + " != " + getTableAliasFor( r2 ); log.debug( "adding predicate to disambiguate sibling residues: " + disambiguation ); residuePredicates.add( disambiguation ); } } } } /** * Adds a query optimisation predicate which ensures that only * candidate structures with at least the given number of descendants * descending from the given {@link Residue} are searched. */ protected void addSizePredicate( Residue r, int min_children ) { if ( min_children <= MIN_DESCENDANTS_B4_OPTIMISATION_APPLIES ) return; String table = getTableAliasFor( r ); // makes use of nested set left/right values -- // number of descendants of any node in a tree is // equal to ((right - left - 1) / 2), so (right - left) // should be equal to or greater than 2x descendants + 1 int limit = (min_children * 2) + 1; String size_predicate = table + ".rightIndex" + " - " + table + ".leftIndex" + " >= " + limit ; if ( log.isDebugEnabled() ) { log.debug( "adding min descendants predicate: " + size_predicate ); } addPredicate( size_predicate ); } /** * Add a predicate for the given {@link Residue}, using the given linkage * column, to match the given linkage position (ie: r.column == position). */ protected void addLinkagePredicate( Residue r, String column, int position ) { if ( position == 1 ) { log.warn("*** NOTE: glycanbuilder gives default position == 1" + " so NOT adding a predicate for it ***"); return; } if ( position > 0 ) { String predicate = getTableAliasFor( r ) + '.' + column + " = " + position ; if ( log.isDebugEnabled() ) log.debug("adding WHERE clause linkage predicate: " + predicate ); linkagePredicates.add( predicate ); } } /** * Add a predicate for the given {@link Anomer} of the given {@link Residue} * using the given column (ie: r.column == a). */ protected void addAnomerPredicate( Residue r, String column, Anomer a ) { // log.warn("discarding anomer predicate cause DB column empty for the moment..."); // if ( false /* fixme */ && a != null && a.isDefinite() ) if ( a != null && a.isDefinite() ) { String predicate = getTableAliasFor( r ) + '.' + column + " = '" + a.toChar() + "'" ; if ( log.isDebugEnabled() ) log.debug("adding WHERE clause anomer predicate: " + predicate ); linkagePredicates.add( predicate ); } } /** * Modifies the query being generated by the {@link Set} of {@link Option}s * contained in the current {@link SubstructureQuery}. */ protected void processOptions() { if ( query.options.size() == 0 ) return; for ( SubstructureQuery.Option option : query.options ) { log.debug("processing option " + option ); option.modifyQuery( this, query.options ); } } } // end class