/* * Copyright 2009-2012 Collaborative Research Centre SFB 632 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package annis.ql.parser; import annis.model.Join; import annis.model.QueryNode; import annis.model.QueryNode.Range; import annis.sqlgen.model.Precedence; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; /** * Extends precedence relations to other nodes that are only transitivly connected. * * The algorithm calculates the reachability graph for each node of the query * (as defined by the precedence operator) and inherits and extends the precedence * property to the nodes connected with this node. The Goal is to preserve as * much restrictive information as possible. * * Breadth-first search is used in order to find the shortest precedence * relation between nodes . This is just an approximation since beeing near in * the reachability graph does not necessary mean the relation is more * restrictive than a relation with more relations. Still it is assumed that * "normal" AQL queries will satisfiy this condition. And in the end, even * a "is after this token somewhere in the text" condition is a huge improvement. * * @author Thomas Krause <krauseto@hu-berlin.de> */ public class TransitivePrecedenceOptimizer implements QueryDataTransformer { public TransitivePrecedenceOptimizer() { } @Override public QueryData transform(QueryData data) { // initialize helper variables HashSet<Long> visitedNodes = new HashSet<>(); for(List<QueryNode> alternative : data.getAlternatives()) { Map<Long, Set<Precedence>> outJoins = createInitialJoinMap(alternative); for(QueryNode node : alternative) { Set<String> segmentations = getAllSegmentations(node); visitedNodes.clear(); // we apply the algorithm node by node // tok == null segmentation propagateNodePrecedence(node, node, visitedNodes, outJoins, null, null); for(String s : segmentations) { propagateNodePrecedence(node, node, visitedNodes, outJoins, null, s); } } } return data; } private Set<String> getAllSegmentations(QueryNode node) { Set<String> result = new TreeSet<>(); for(Join j : node.getOutgoingJoins()) { if(j instanceof Precedence) { Precedence p = (Precedence) j; if(p.getSegmentationName() != null) { result.add(p.getSegmentationName()); } } } return result; } private Map<Long, Set<Precedence>> createInitialJoinMap(List<QueryNode> alternative) { Map<Long, Set<Precedence>> result = new HashMap<>(); for(QueryNode node : alternative) { Set<Precedence> joinList = new HashSet<>(); for(Join j : node.getOutgoingJoins()) { if(j instanceof Precedence) { joinList.add((Precedence) j); } } result.put(node.getId(), joinList); } return result; } private void propagateNodePrecedence(QueryNode initialNode, QueryNode currentNode, Set<Long> visitedNodes, Map<Long, Set<Precedence>> outJoins, Range range, String segmentation) { visitedNodes.add(currentNode.getId()); Map<QueryNode, Range> nextNodes = new HashMap<>(); // iterator over all outgoing precedence joins List<Join> originalJoins = new LinkedList<>(currentNode.getOutgoingJoins()); for(Join join : originalJoins) { if(join instanceof Precedence) { Precedence p = (Precedence) join; if((segmentation == null && p.getSegmentationName() == null) || (segmentation != null && segmentation.equals(p.getSegmentationName())) ) { Range newRange; if(range == null) { // create a new range at initial node newRange = new Range(p.getMinDistance(), p.getMaxDistance()); } else { // calculate the new range depending on old one if( currentNode.isToken() == false || (range.getMin() == 0 && range.getMax() == 0) || (p.getMinDistance() == 0 && p.getMaxDistance() == 0)) { // use unlimited range since // a) the node could also be a // span covering more than one token, // b) the original constraint is an unlimited range newRange = new Range(0, 0); } else { // add the new precendence values to the old one newRange = new Range(range.getMin() + p.getMinDistance(), range.getMax() + p.getMaxDistance()); } } // put the target node in the list of nodes to check if not visited yet if(!visitedNodes.contains(p.getTarget().getId())) { nextNodes.put(p.getTarget(), newRange); Precedence newJoin = new Precedence(p.getTarget(), newRange.getMin(), newRange.getMax()); Set<Precedence> existingJoins = outJoins.get(initialNode.getId()); // only add if this join is not already included // (which is always true for the initial node) // and the join is more restrictive than any previous one boolean moreRestrictive = true; for (Precedence oldJoin : existingJoins) { if(oldJoin.getTarget() == newJoin.getTarget()) { if (!joinMoreRestrictive(oldJoin, newJoin)) { moreRestrictive = false; break; } } } if (moreRestrictive) { // add newly created discovered transitive precedence initialNode.addOutgoingJoin(newJoin); existingJoins.add(newJoin); } } // end if not visited yet } // end if segmentation matches } // end if is precedence join } // end for each join for(Map.Entry<QueryNode, Range> e : nextNodes.entrySet()) { // call us recursivly but remember the range propagateNodePrecedence(initialNode, e.getKey(), visitedNodes, outJoins, e.getValue(), segmentation); } } private boolean joinMoreRestrictive(Precedence joinOld, Precedence joinNew) { // the new one is an unlimited indirect join which can never be better than // the original one if(joinNew.getMinDistance() == 0 && joinNew.getMaxDistance() == 0) { return false; } // both values are worse than the old one if(joinNew.getMaxDistance() >= joinOld.getMaxDistance() && joinNew.getMinDistance() <= joinOld.getMinDistance()) { return false; } // difference is less than the old one if((joinOld.getMaxDistance() - joinOld.getMinDistance()) < (joinNew.getMaxDistance() - joinNew.getMinDistance()) ) { return false; } return true; } }