/*
* *************************************************************************************
* Copyright (C) 2008 EsperTech, Inc. All rights reserved. *
* http://esper.codehaus.org *
* http://www.espertech.com *
* ---------------------------------------------------------------------------------- *
* The software in this package is published under the terms of the GPL license *
* a copy of which has been included with this distribution in the license.txt file. *
* *************************************************************************************
*/
package com.espertech.esper.rowregex;
import com.espertech.esper.collection.Pair;
import com.espertech.esper.epl.expression.ExprNode;
import com.espertech.esper.epl.expression.ExprNode;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.util.*;
/**
* Helper for match recognize.
*/
public class EventRowRegexHelper
{
private static final Log log = LogFactory.getLog(EventRowRegexHelper.class);
/**
* Inspect variables recursively.
* @param parent parent regex expression node
* @param isMultiple if the variable in the stack is multiple of single
* @param variablesSingle single variables list
* @param variablesMultiple group variables list
*/
protected static void recursiveInspectVariables(RowRegexExprNode parent, boolean isMultiple, Set<String> variablesSingle, Set<String> variablesMultiple)
{
if (parent instanceof RowRegexExprNodeNested)
{
RowRegexExprNodeNested nested = (RowRegexExprNodeNested) parent;
for (RowRegexExprNode child : parent.getChildNodes())
{
recursiveInspectVariables(child, nested.getType().isMultipleMatches() || isMultiple, variablesSingle, variablesMultiple);
}
}
else if (parent instanceof RowRegexExprNodeAlteration)
{
for (RowRegexExprNode childAlteration : parent.getChildNodes())
{
LinkedHashSet<String> singles = new LinkedHashSet<String>();
LinkedHashSet<String> multiples = new LinkedHashSet<String>();
recursiveInspectVariables(childAlteration, isMultiple, singles, multiples);
variablesMultiple.addAll(multiples);
variablesSingle.addAll(singles);
}
variablesSingle.removeAll(variablesMultiple);
}
else if (parent instanceof RowRegexExprNodeAtom)
{
RowRegexExprNodeAtom atom = (RowRegexExprNodeAtom) parent;
String name = atom.getTag();
if (variablesMultiple.contains(name))
{
return;
}
if (variablesSingle.contains(name))
{
variablesSingle.remove(name);
variablesMultiple.add(name);
return;
}
if (atom.getType().isMultipleMatches())
{
variablesMultiple.add(name);
return;
}
if (isMultiple)
{
variablesMultiple.add(name);
}
else
{
variablesSingle.add(name);
}
}
else
{
for (RowRegexExprNode child : parent.getChildNodes())
{
recursiveInspectVariables(child, isMultiple, variablesSingle, variablesMultiple);
}
}
}
/**
* Build a list of start states from the parent node.
* @param parent to build start state for
* @param variableDefinitions each variable and its expressions
* @param variableStreams variable name and its stream number
* @return strand of regex state nodes
*/
protected static RegexNFAStrandResult recursiveBuildStartStates(RowRegexExprNode parent,
Map<String, ExprNode> variableDefinitions,
Map<String, Pair<Integer, Boolean>> variableStreams
)
{
Stack<Integer> nodeNumStack = new Stack<Integer>();
RegexNFAStrand strand = recursiveBuildStatesInternal(parent,
variableDefinitions,
variableStreams,
nodeNumStack);
// add end state
RegexNFAStateEnd end = new RegexNFAStateEnd();
for (RegexNFAStateBase endStates : strand.getEndStates())
{
endStates.addState(end);
}
// assign node num as a counter
int nodeNumberFlat = 0;
for (RegexNFAStateBase theBase : strand.getAllStates())
{
theBase.setNodeNumFlat(nodeNumberFlat++);
}
return new RegexNFAStrandResult(new ArrayList<RegexNFAState>(strand.getStartStates()), strand.getAllStates());
}
private static RegexNFAStrand recursiveBuildStatesInternal(RowRegexExprNode node,
Map<String, ExprNode> variableDefinitions,
Map<String, Pair<Integer, Boolean>> variableStreams,
Stack<Integer> nodeNumStack
)
{
if (node instanceof RowRegexExprNodeAlteration)
{
int nodeNum = 0;
List<RegexNFAStateBase> cumulativeStartStates = new ArrayList<RegexNFAStateBase>();
List<RegexNFAStateBase> cumulativeStates = new ArrayList<RegexNFAStateBase>();
List<RegexNFAStateBase> cumulativeEndStates = new ArrayList<RegexNFAStateBase>();
boolean isPassthrough = false;
for (RowRegexExprNode child : node.getChildNodes())
{
nodeNumStack.push(nodeNum);
RegexNFAStrand strand = recursiveBuildStatesInternal(child,
variableDefinitions,
variableStreams,
nodeNumStack);
nodeNumStack.pop();
cumulativeStartStates.addAll(strand.getStartStates());
cumulativeStates.addAll(strand.getAllStates());
cumulativeEndStates.addAll(strand.getEndStates());
if (strand.isPassthrough())
{
isPassthrough = true;
}
nodeNum++;
}
return new RegexNFAStrand(cumulativeStartStates, cumulativeEndStates, cumulativeStates, isPassthrough);
}
else if (node instanceof RowRegexExprNodeConcatenation)
{
int nodeNum = 0;
boolean isPassthrough = true;
List<RegexNFAStateBase> cumulativeStates = new ArrayList<RegexNFAStateBase>();
RegexNFAStrand[] strands = new RegexNFAStrand[node.getChildNodes().size()];
for (RowRegexExprNode child : node.getChildNodes())
{
nodeNumStack.push(nodeNum);
strands[nodeNum] = recursiveBuildStatesInternal(child,
variableDefinitions,
variableStreams,
nodeNumStack);
nodeNumStack.pop();
cumulativeStates.addAll(strands[nodeNum].getAllStates());
if (!strands[nodeNum].isPassthrough())
{
isPassthrough = false;
}
nodeNum++;
}
// determine start states: all states until the first non-passthrough start state
List<RegexNFAStateBase> startStates = new ArrayList<RegexNFAStateBase>();
for (int i = 0; i < strands.length; i++)
{
startStates.addAll(strands[i].getStartStates());
if (!strands[i].isPassthrough())
{
break;
}
}
// determine end states: all states from the back until the last non-passthrough end state
List<RegexNFAStateBase> endStates = new ArrayList<RegexNFAStateBase>();
for (int i = strands.length - 1; i >= 0; i--)
{
endStates.addAll(strands[i].getEndStates());
if (!strands[i].isPassthrough())
{
break;
}
}
// hook up the end state of each strand with the start states of each next strand
for (int i = strands.length - 1; i >= 1; i--)
{
RegexNFAStrand current = strands[i];
for (int j = i - 1; j >= 0; j--)
{
RegexNFAStrand prior = strands[j];
for (RegexNFAStateBase endState : prior.getEndStates())
{
for (RegexNFAStateBase startState : current.getStartStates())
{
endState.addState(startState);
}
}
if (!prior.isPassthrough())
{
break;
}
}
}
return new RegexNFAStrand(startStates, endStates, cumulativeStates, isPassthrough);
}
else if (node instanceof RowRegexExprNodeNested)
{
RowRegexExprNodeNested nested = (RowRegexExprNodeNested) node;
nodeNumStack.push(0);
RegexNFAStrand strand = recursiveBuildStatesInternal(node.getChildNodes().get(0),
variableDefinitions,
variableStreams,
nodeNumStack);
nodeNumStack.pop();
boolean isPassthrough = strand.isPassthrough() || nested.getType().isOptional();
// if this is a repeating node then pipe back each end state to each begin state
if (nested.getType().isMultipleMatches())
{
for (RegexNFAStateBase endstate : strand.getEndStates())
{
for (RegexNFAStateBase startstate : strand.getStartStates())
{
if (!endstate.getNextStates().contains(startstate))
{
endstate.getNextStates().add(startstate);
}
}
}
}
return new RegexNFAStrand(strand.getStartStates(), strand.getEndStates(), strand.getAllStates(), isPassthrough);
}
else
{
RowRegexExprNodeAtom atom = (RowRegexExprNodeAtom) node;
// assign stream number for single-variables for most direct expression eval; multiple-variable gets -1
int streamNum = variableStreams.get(atom.getTag()).getFirst();
boolean multiple = variableStreams.get(atom.getTag()).getSecond();
ExprNode expressionDef = variableDefinitions.get(atom.getTag());
RegexNFAStateBase nextState;
if ((atom.getType() == RegexNFATypeEnum.ZERO_TO_MANY) || (atom.getType() == RegexNFATypeEnum.ZERO_TO_MANY_RELUCTANT))
{
nextState = new RegexNFAStateZeroToMany(toString(nodeNumStack), atom.getTag(), streamNum, multiple, atom.getType().isGreedy(), expressionDef);
}
else if ((atom.getType() == RegexNFATypeEnum.ONE_TO_MANY) || (atom.getType() == RegexNFATypeEnum.ONE_TO_MANY_RELUCTANT))
{
nextState = new RegexNFAStateOneToMany(toString(nodeNumStack), atom.getTag(), streamNum, multiple, atom.getType().isGreedy(), expressionDef);
}
else if ((atom.getType() == RegexNFATypeEnum.ONE_OPTIONAL) || (atom.getType() == RegexNFATypeEnum.ONE_OPTIONAL_RELUCTANT))
{
nextState = new RegexNFAStateOneOptional(toString(nodeNumStack), atom.getTag(), streamNum, multiple, atom.getType().isGreedy(), expressionDef);
}
else if (expressionDef == null)
{
nextState = new RegexNFAStateAnyOne(toString(nodeNumStack), atom.getTag(), streamNum, multiple);
}
else
{
nextState = new RegexNFAStateFilter(toString(nodeNumStack), atom.getTag(), streamNum, multiple, expressionDef);
}
return new RegexNFAStrand(Collections.singletonList(nextState), Collections.singletonList(nextState),
Collections.singletonList(nextState), atom.getType().isOptional());
}
}
private static String toString(Stack<Integer> nodeNumStack) {
StringBuilder builder = new StringBuilder();
String delimiter = "";
for (Integer atom : nodeNumStack)
{
builder.append(delimiter);
builder.append(Integer.toString(atom));
delimiter = ".";
}
return builder.toString();
}
}