/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.plan.optimizer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import org.apache.pig.PigException;
import org.apache.pig.impl.plan.Operator;
import org.apache.pig.impl.plan.OperatorPlan;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.plan.optimizer.RuleOperator.NodeType;
import org.apache.pig.impl.util.Pair;
/**
* RuleMatcher contains the logic to determine whether a given rule matches.
* This alone does not mean the rule will be applied. Transformer.check()
* still has to pass before Transfomer.transform() is called.
*
*/
public class RuleMatcher<O extends Operator, P extends OperatorPlan<O>> {
private Rule<O, P> mRule;
private List<Pair<O, RuleOperator.NodeType>> mMatch;
private List<List<Pair<O, RuleOperator.NodeType>>> mPrelimMatches = new ArrayList<List<Pair<O, RuleOperator.NodeType>>>();
private List<List<O>> mMatches = new ArrayList<List<O>>();
private P mPlan; // for convenience.
private int mNumCommonNodes = 0;
/**
* Test a rule to see if it matches the current plan. Save all matched nodes using BFS
* @param rule Rule to test for a match.
* @return true if the plan matches.
*/
public boolean match(Rule<O, P> rule) throws OptimizerException {
mRule = rule;
CommonNodeFinder commonNodeFinder = new CommonNodeFinder(mRule.getPlan());
try {
commonNodeFinder.visit();
mNumCommonNodes = commonNodeFinder.getCount();
} catch (VisitorException ve) {
int errCode = 2125;
String msg = "Internal error. Problem in computing common nodes in the Rule Plan.";
throw new OptimizerException(msg, errCode, PigException.BUG, ve);
}
mPlan = mRule.getTransformer().getPlan();
mMatches.clear();
mPrelimMatches.clear();
if (mRule.getWalkerAlgo() == Rule.WalkerAlgo.DependencyOrderWalker)
DependencyOrderWalker();
else if (mRule.getWalkerAlgo() == Rule.WalkerAlgo.DepthFirstWalker)
DepthFirstWalker();
else if (mRule.getWalkerAlgo() == Rule.WalkerAlgo.ReverseDependencyOrderWalker)
ReverseDependencyOrderWalker();
return (mMatches.size()!=0);
}
private void ReverseDependencyOrderWalker()
{
List<O> fifo = new ArrayList<O>();
Set<O> seen = new HashSet<O>();
List<O> roots = mPlan.getRoots();
if (roots == null) return;
for (O op : roots) {
RDODoAllSuccessors(op, seen, fifo);
}
for (O op: fifo) {
if (beginMatch(op))
mPrelimMatches.add(mMatch);
}
if(mPrelimMatches.size() > 0) {
processPreliminaryMatches();
}
}
private void RDODoAllSuccessors(O node, Set<O> seen, Collection<O> fifo)
{
if (!seen.contains(node)) {
// We haven't seen this one before.
Collection<O> succs = mPlan.getSuccessors(node);
if (succs != null && succs.size() > 0) {
// Do all our successors before ourself
for (O op : succs) {
RDODoAllSuccessors(op, seen, fifo);
}
}
// Now do ourself
seen.add(node);
fifo.add(node);
}
}
private void DependencyOrderWalker()
{
List<O> fifo = new ArrayList<O>();
Set<O> seen = new HashSet<O>();
List<O> leaves = mPlan.getLeaves();
if (leaves == null) return;
for (O op : leaves) {
BFSDoAllPredecessors(op, seen, fifo);
}
for (O op: fifo) {
if (beginMatch(op))
{
mPrelimMatches.add(mMatch);
}
}
if(mPrelimMatches.size() > 0) {
processPreliminaryMatches();
}
}
/**
* A method to compute the final matches
*/
private void processPreliminaryMatches() {
//The preliminary matches contain paths that match
//the specification in the RulePlan. However, if there
//are twigs and DAGs, then a further computation is required
//to extract the nodes in the mPlan that correspond to the
//roots of the RulePlan
//compute the number of common nodes in each preliminary match
List<List<O>> commonNodesPerMatch = new ArrayList<List<O>>();
for(int i = 0; i < mPrelimMatches.size(); ++i) {
commonNodesPerMatch.add(getCommonNodesFromMatch(mPrelimMatches.get(i)));
}
if(mNumCommonNodes == 0) {
//the rule plan had simple paths
//verification step
//if any of the preliminary matches had common nodes
//then its an anomaly
for(int i = 0; i < commonNodesPerMatch.size(); ++i) {
if(commonNodesPerMatch.get(i) != null) {
//we have found common nodes when there should be none
//just return as mMatches will be empty
return;
}
}
//pick the first node of each match and put them into individual lists
//put the lists inside the list of lists mMatches
for(int i = 0; i < mPrelimMatches.size(); ++i) {
List<O> match = new ArrayList<O>();
match.add(mPrelimMatches.get(i).get(0).first);
mMatches.add(match);
}
//all the matches have been computed for the simple path
return;
} else {
for(int i = 0; i < commonNodesPerMatch.size(); ++i) {
int commonNodes = (commonNodesPerMatch.get(i) == null? 0 : commonNodesPerMatch.get(i).size());
if(commonNodes != mNumCommonNodes) {
//if there are is a mismatch in the common nodes then we have a problem
//the rule plan states that we have mNumCommonNodes but we have commonNodes
//in the match. Just return
return;
}
}
}
//keep track of the matches that have been processed
List<Boolean> processedMatches = new ArrayList<Boolean>();
for(int i = 0; i < mPrelimMatches.size(); ++i) {
processedMatches.add(false);
}
//a do while loop to handle single matches
int outerIndex = 0;
do {
if(processedMatches.get(outerIndex)) {
++outerIndex;
continue;
}
List<Pair<O, RuleOperator.NodeType>> outerMatch = mPrelimMatches.get(outerIndex);
List<O> outerCommonNodes = commonNodesPerMatch.get(outerIndex);
Set<O> outerSetCommonNodes = new HashSet<O>(outerCommonNodes);
Set<O> finalIntersection = new HashSet<O>(outerCommonNodes);
Set<O> cumulativeIntersection = new HashSet<O>(outerCommonNodes);
List<O> patternMatchingRoots = new ArrayList<O>();
Set<O> unionOfRoots = new HashSet<O>();
boolean innerMatchProcessed = false;
unionOfRoots.add(outerMatch.get(0).first);
for(int innerIndex = outerIndex + 1;
(innerIndex < mPrelimMatches.size()) && (!processedMatches.get(innerIndex));
++innerIndex) {
List<Pair<O, RuleOperator.NodeType>> innerMatch = mPrelimMatches.get(innerIndex);
List<O> innerCommonNodes = commonNodesPerMatch.get(innerIndex);
Set<O> innerSetCommonNodes = new HashSet<O>(innerCommonNodes);
//we need to compute the intersection of the common nodes
//the size of the intersection should be equal to the number
//of common nodes and the type of each rule node class
//if there is no match then it could be that we hit a match
//for a different path, i.e., another pattern that matched
//with a different set of nodes. In this case, we mark this
//match as not processed and move onto the next match
outerSetCommonNodes.retainAll(innerSetCommonNodes);
if(outerSetCommonNodes.size() != mNumCommonNodes) {
//there was no match
//continue to the next match
continue;
} else {
Set<O> tempCumulativeIntersection = new HashSet<O>(cumulativeIntersection);
tempCumulativeIntersection.retainAll(outerSetCommonNodes);
if(tempCumulativeIntersection.size() != mNumCommonNodes) {
//problem - there was a set intersection with a size mismatch
//between the cumulative intersection and the intersection of the
//inner and outer common nodes
//set mMatches to empty and return
mMatches = new ArrayList<List<O>>();
return;
} else {
processedMatches.set(innerIndex, true);
innerMatchProcessed = true;
cumulativeIntersection = tempCumulativeIntersection;
unionOfRoots.add(innerMatch.get(0).first);
}
}
}
cumulativeIntersection.retainAll(finalIntersection);
if(cumulativeIntersection.size() != mNumCommonNodes) {
//the cumulative and final intersections did not intersect
//this could happen when each of the matches are disjoint
//check if the innerMatches were processed at all
if(innerMatchProcessed) {
//problem - the inner matches were processed and we did
//not find common intersections
mMatches = new ArrayList<List<O>>();
return;
}
}
processedMatches.set(outerIndex, true);
for(O node: unionOfRoots) {
patternMatchingRoots.add(node);
}
mMatches.add(patternMatchingRoots);
++outerIndex;
} while (outerIndex < mPrelimMatches.size() - 1);
}
private List<O> getCommonNodesFromMatch(List<Pair<O, NodeType>> match) {
List<O> commonNodes = null;
//A lookup table to weed out duplicates
Map<O, Boolean> lookup = new HashMap<O, Boolean>();
for(int index = 0; index < match.size(); ++index) {
if(match.get(index).second.equals(RuleOperator.NodeType.COMMON_NODE)) {
if(commonNodes == null) {
commonNodes = new ArrayList<O>();
}
O node = match.get(index).first;
//lookup the node under question
//if the node is not found in the table
//then we are examining it for the first time
//add it to the output list and mark it as seen
//else continue to the next iteration
if(lookup.get(node) == null) {
commonNodes.add(node);
lookup.put(node, true);
}
}
}
return commonNodes;
}
private void BFSDoAllPredecessors(O node, Set<O> seen, Collection<O> fifo) {
if (!seen.contains(node)) {
// We haven't seen this one before.
Collection<O> preds = mPlan.getPredecessors(node);
if (preds != null && preds.size() > 0) {
// Do all our predecessors before ourself
for (O op : preds) {
BFSDoAllPredecessors(op, seen, fifo);
}
}
// Now do ourself
seen.add(node);
fifo.add(node);
}
}
private void DepthFirstWalker()
{
Set<O> seen = new HashSet<O>();
DFSVisit(null, mPlan.getRoots(), seen);
}
private void DFSVisit(O node, Collection<O> successors,Set<O> seen)
{
if (successors == null) return;
for (O suc : successors) {
if (seen.add(suc)) {
if (beginMatch(suc))
mPrelimMatches.add(mMatch);
Collection<O> newSuccessors = mPlan.getSuccessors(suc);
DFSVisit(suc, newSuccessors, seen);
}
}
}
/**
* @return first occurrence of matched list of nodes that with the instances of nodes that matched the
* pattern defined by
* the rule. The nodes will be in the vector in the order they are
* specified in the rule.
*/
List<O> getMatches() {
if (mMatches.size()>=1)
return mMatches.get(0);
return null;
}
/**
* @return all occurrences of matches. lists of nodes that with the instances of nodes that matched the
* pattern defined by
* the rule. The nodes will be in the vector in the order they are
* specified in the rule.
*/
public List<List<O>> getAllMatches() {
return mMatches;
}
/*
* This pattern matching is fairly simple and makes some important
* assumptions.
* 1) The pattern to be matched must be expressible as a graph.
* 2) The pattern must always begin with one of the root nodes in the rule plan.
* After that it can go where it wants.
*
*/
private boolean beginMatch(O node) {
if (node == null) return false;
mMatch = new ArrayList<Pair<O, RuleOperator.NodeType>>();
List<O> nodeSuccessors;
List<RuleOperator> ruleRoots = mRule.getPlan().getRoots();
for(RuleOperator ruleRoot: ruleRoots) {
if (node.getClass().getName().equals(ruleRoot.getNodeClass().getName()) ||
ruleRoot.getNodeType().equals(RuleOperator.NodeType.ANY_NODE)) {
mMatch.add(new Pair<O, RuleOperator.NodeType>(node, ruleRoot.getNodeType()));
// Follow the edge to see the next node we should be looking for.
List<RuleOperator> ruleRootSuccessors = mRule.getPlan().getSuccessors(ruleRoot);
if (ruleRootSuccessors == null) {
// This was looking for a single node
return true;
}
nodeSuccessors = mPlan.getSuccessors(node);
if ((nodeSuccessors == null) || (nodeSuccessors.size() != ruleRootSuccessors.size())) {
//the ruleRoot has successors but the node does not
//OR
//the number of successors for the ruleRoot does not match
//the number of successors for the node
return false;
}
boolean foundMatch = false;
for (O nodeSuccessor : nodeSuccessors) {
foundMatch |= continueMatch(nodeSuccessor, ruleRootSuccessors);
}
return foundMatch;
}
}
// If we get here we haven't found it.
return false;
}
private boolean continueMatch(O node, List<RuleOperator> ruleOperators) {
for(RuleOperator ruleOperator: ruleOperators) {
if (node.getClass().getName().equals(ruleOperator.getNodeClass().getName()) ||
ruleOperator.getNodeType().equals(RuleOperator.NodeType.ANY_NODE)) {
mMatch.add(new Pair<O, RuleOperator.NodeType>(node,ruleOperator.getNodeType()));
// Follow the edge to see the next node we should be looking for.
List<RuleOperator> ruleOperatorSuccessors = mRule.getPlan().getSuccessors(ruleOperator);
if (ruleOperatorSuccessors == null) {
// We've completed the match
return true;
}
List<O> nodeSuccessors;
nodeSuccessors = mPlan.getSuccessors(node);
if ((nodeSuccessors == null) ||
(nodeSuccessors.size() != ruleOperatorSuccessors.size())) {
//the ruleOperator has successors but the node does not
//OR
//the number of successors for the ruleOperator does not match
//the number of successors for the node
return false;
}
boolean foundMatch = false;
for (O nodeSuccessor : nodeSuccessors) {
foundMatch |= continueMatch(nodeSuccessor, ruleOperatorSuccessors);
}
return foundMatch;
}
// We can arrive here either because we didn't match at this node or
// further down the line. One way or another we need to remove ourselves
// from the match vector and return false.
//SMS - I don't think we need this as mMatch will be discarded anyway
//mMatch.set(nodeNumber, null);
return false;
}
return false;
}
}