/* *************************************************************************************** * Copyright (C) 2006 EsperTech, Inc. All rights reserved. * * http://www.espertech.com/esper * * http://www.espertech.com * * ---------------------------------------------------------------------------------- * * The software in this package is published under the terms of the GPL license * * a copy of which has been included with this distribution in the license.txt file. * *************************************************************************************** */ package com.espertech.esper.rowregex; import com.espertech.esper.collection.Pair; import com.espertech.esper.epl.expression.core.ExprNode; import com.espertech.esper.view.View; import com.espertech.esper.view.Viewable; import java.util.*; /** * Helper for match recognize. */ public class EventRowRegexHelper { public static EventRowRegexNFAViewService recursiveFindRegexService(Viewable top) { if (top instanceof EventRowRegexNFAViewService) { return (EventRowRegexNFAViewService) top; } for (View view : top.getViews()) { return recursiveFindRegexService(view); } return null; } protected static final Comparator<RegexNFAStateEntry> END_STATE_COMPARATOR = new Comparator<RegexNFAStateEntry>() { public int compare(RegexNFAStateEntry o1, RegexNFAStateEntry o2) { if (o1.getMatchEndEventSeqNo() > o2.getMatchEndEventSeqNo()) { return -1; } if (o1.getMatchEndEventSeqNo() < o2.getMatchEndEventSeqNo()) { return 1; } return 0; } }; /** * Inspect variables recursively. * * @param parent parent regex expression node * @param isMultiple if the variable in the stack is multiple of single * @param variablesSingle single variables list * @param variablesMultiple group variables list */ protected static void recursiveInspectVariables(RowRegexExprNode parent, boolean isMultiple, Set<String> variablesSingle, Set<String> variablesMultiple) { if (parent instanceof RowRegexExprNodeNested) { RowRegexExprNodeNested nested = (RowRegexExprNodeNested) parent; for (RowRegexExprNode child : parent.getChildNodes()) { recursiveInspectVariables(child, nested.getType().isMultipleMatches() || isMultiple, variablesSingle, variablesMultiple); } } else if (parent instanceof RowRegexExprNodeAlteration) { for (RowRegexExprNode childAlteration : parent.getChildNodes()) { LinkedHashSet<String> singles = new LinkedHashSet<String>(); LinkedHashSet<String> multiples = new LinkedHashSet<String>(); recursiveInspectVariables(childAlteration, isMultiple, singles, multiples); variablesMultiple.addAll(multiples); variablesSingle.addAll(singles); } variablesSingle.removeAll(variablesMultiple); } else if (parent instanceof RowRegexExprNodeAtom) { RowRegexExprNodeAtom atom = (RowRegexExprNodeAtom) parent; String name = atom.getTag(); if (variablesMultiple.contains(name)) { return; } if (variablesSingle.contains(name)) { variablesSingle.remove(name); variablesMultiple.add(name); return; } if (atom.getType().isMultipleMatches()) { variablesMultiple.add(name); return; } if (isMultiple) { variablesMultiple.add(name); } else { variablesSingle.add(name); } } else { for (RowRegexExprNode child : parent.getChildNodes()) { recursiveInspectVariables(child, isMultiple, variablesSingle, variablesMultiple); } } } /** * Build a list of start states from the parent node. * * @param parent to build start state for * @param variableDefinitions each variable and its expressions * @param variableStreams variable name and its stream number * @param exprRequiresMultimatchState indicator whether multi-match state required * @return strand of regex state nodes */ protected static RegexNFAStrandResult recursiveBuildStartStates(RowRegexExprNode parent, Map<String, ExprNode> variableDefinitions, Map<String, Pair<Integer, Boolean>> variableStreams, boolean[] exprRequiresMultimatchState ) { Stack<Integer> nodeNumStack = new Stack<Integer>(); RegexNFAStrand strand = recursiveBuildStatesInternal(parent, variableDefinitions, variableStreams, nodeNumStack, exprRequiresMultimatchState); // add end state RegexNFAStateEnd end = new RegexNFAStateEnd(); for (RegexNFAStateBase endStates : strand.getEndStates()) { endStates.addState(end); } // assign node num as a counter int nodeNumberFlat = 0; for (RegexNFAStateBase theBase : strand.getAllStates()) { theBase.setNodeNumFlat(nodeNumberFlat++); } return new RegexNFAStrandResult(new ArrayList<RegexNFAState>(strand.getStartStates()), strand.getAllStates()); } private static RegexNFAStrand recursiveBuildStatesInternal(RowRegexExprNode node, Map<String, ExprNode> variableDefinitions, Map<String, Pair<Integer, Boolean>> variableStreams, Stack<Integer> nodeNumStack, boolean[] exprRequiresMultimatchState ) { if (node instanceof RowRegexExprNodeAlteration) { int nodeNum = 0; List<RegexNFAStateBase> cumulativeStartStates = new ArrayList<RegexNFAStateBase>(); List<RegexNFAStateBase> cumulativeStates = new ArrayList<RegexNFAStateBase>(); List<RegexNFAStateBase> cumulativeEndStates = new ArrayList<RegexNFAStateBase>(); boolean isPassthrough = false; for (RowRegexExprNode child : node.getChildNodes()) { nodeNumStack.push(nodeNum); RegexNFAStrand strand = recursiveBuildStatesInternal(child, variableDefinitions, variableStreams, nodeNumStack, exprRequiresMultimatchState); nodeNumStack.pop(); cumulativeStartStates.addAll(strand.getStartStates()); cumulativeStates.addAll(strand.getAllStates()); cumulativeEndStates.addAll(strand.getEndStates()); if (strand.isPassthrough()) { isPassthrough = true; } nodeNum++; } return new RegexNFAStrand(cumulativeStartStates, cumulativeEndStates, cumulativeStates, isPassthrough); } else if (node instanceof RowRegexExprNodeConcatenation) { int nodeNum = 0; boolean isPassthrough = true; List<RegexNFAStateBase> cumulativeStates = new ArrayList<RegexNFAStateBase>(); RegexNFAStrand[] strands = new RegexNFAStrand[node.getChildNodes().size()]; for (RowRegexExprNode child : node.getChildNodes()) { nodeNumStack.push(nodeNum); strands[nodeNum] = recursiveBuildStatesInternal(child, variableDefinitions, variableStreams, nodeNumStack, exprRequiresMultimatchState); nodeNumStack.pop(); cumulativeStates.addAll(strands[nodeNum].getAllStates()); if (!strands[nodeNum].isPassthrough()) { isPassthrough = false; } nodeNum++; } // determine start states: all states until the first non-passthrough start state List<RegexNFAStateBase> startStates = new ArrayList<RegexNFAStateBase>(); for (int i = 0; i < strands.length; i++) { startStates.addAll(strands[i].getStartStates()); if (!strands[i].isPassthrough()) { break; } } // determine end states: all states from the back until the last non-passthrough end state List<RegexNFAStateBase> endStates = new ArrayList<RegexNFAStateBase>(); for (int i = strands.length - 1; i >= 0; i--) { endStates.addAll(strands[i].getEndStates()); if (!strands[i].isPassthrough()) { break; } } // hook up the end state of each strand with the start states of each next strand for (int i = strands.length - 1; i >= 1; i--) { RegexNFAStrand current = strands[i]; for (int j = i - 1; j >= 0; j--) { RegexNFAStrand prior = strands[j]; for (RegexNFAStateBase endState : prior.getEndStates()) { for (RegexNFAStateBase startState : current.getStartStates()) { endState.addState(startState); } } if (!prior.isPassthrough()) { break; } } } return new RegexNFAStrand(startStates, endStates, cumulativeStates, isPassthrough); } else if (node instanceof RowRegexExprNodeNested) { RowRegexExprNodeNested nested = (RowRegexExprNodeNested) node; nodeNumStack.push(0); RegexNFAStrand strand = recursiveBuildStatesInternal(node.getChildNodes().get(0), variableDefinitions, variableStreams, nodeNumStack, exprRequiresMultimatchState); nodeNumStack.pop(); boolean isPassthrough = strand.isPassthrough() || nested.getType().isOptional(); // if this is a repeating node then pipe back each end state to each begin state if (nested.getType().isMultipleMatches()) { for (RegexNFAStateBase endstate : strand.getEndStates()) { for (RegexNFAStateBase startstate : strand.getStartStates()) { if (!endstate.getNextStates().contains(startstate)) { endstate.getNextStates().add(startstate); } } } } return new RegexNFAStrand(strand.getStartStates(), strand.getEndStates(), strand.getAllStates(), isPassthrough); } else { RowRegexExprNodeAtom atom = (RowRegexExprNodeAtom) node; // assign stream number for single-variables for most direct expression eval; multiple-variable gets -1 int streamNum = variableStreams.get(atom.getTag()).getFirst(); boolean multiple = variableStreams.get(atom.getTag()).getSecond(); ExprNode expressionDef = variableDefinitions.get(atom.getTag()); boolean exprRequiresMultimatch = exprRequiresMultimatchState[streamNum]; RegexNFAStateBase nextState; if ((atom.getType() == RegexNFATypeEnum.ZERO_TO_MANY) || (atom.getType() == RegexNFATypeEnum.ZERO_TO_MANY_RELUCTANT)) { nextState = new RegexNFAStateZeroToMany(toString(nodeNumStack), atom.getTag(), streamNum, multiple, atom.getType().isGreedy(), expressionDef, exprRequiresMultimatch); } else if ((atom.getType() == RegexNFATypeEnum.ONE_TO_MANY) || (atom.getType() == RegexNFATypeEnum.ONE_TO_MANY_RELUCTANT)) { nextState = new RegexNFAStateOneToMany(toString(nodeNumStack), atom.getTag(), streamNum, multiple, atom.getType().isGreedy(), expressionDef, exprRequiresMultimatch); } else if ((atom.getType() == RegexNFATypeEnum.ONE_OPTIONAL) || (atom.getType() == RegexNFATypeEnum.ONE_OPTIONAL_RELUCTANT)) { nextState = new RegexNFAStateOneOptional(toString(nodeNumStack), atom.getTag(), streamNum, multiple, atom.getType().isGreedy(), expressionDef, exprRequiresMultimatch); } else if (expressionDef == null) { nextState = new RegexNFAStateAnyOne(toString(nodeNumStack), atom.getTag(), streamNum, multiple); } else { nextState = new RegexNFAStateFilter(toString(nodeNumStack), atom.getTag(), streamNum, multiple, expressionDef, exprRequiresMultimatch); } return new RegexNFAStrand(Collections.singletonList(nextState), Collections.singletonList(nextState), Collections.singletonList(nextState), atom.getType().isOptional()); } } private static String toString(Stack<Integer> nodeNumStack) { StringBuilder builder = new StringBuilder(); String delimiter = ""; for (Integer atom : nodeNumStack) { builder.append(delimiter); builder.append(Integer.toString(atom)); delimiter = "."; } return builder.toString(); } public static Map<String, Set<String>> determineVisibility(RowRegexExprNode pattern) { Map<String, Set<String>> map = new HashMap<String, Set<String>>(); ArrayDeque<RowRegexExprNode> path = new ArrayDeque<RowRegexExprNode>(); recursiveFindPatternAtoms(pattern, path, map); return map; } private static void recursiveFindPatternAtoms(RowRegexExprNode parent, ArrayDeque<RowRegexExprNode> path, Map<String, Set<String>> map) { path.add(parent); for (RowRegexExprNode child : parent.getChildNodes()) { if (child instanceof RowRegexExprNodeAtom) { handleAtom((RowRegexExprNodeAtom) child, path, map); } else { recursiveFindPatternAtoms(child, path, map); } } path.removeLast(); } private static void handleAtom(RowRegexExprNodeAtom atom, ArrayDeque<RowRegexExprNode> path, Map<String, Set<String>> map) { RowRegexExprNode[] patharr = path.toArray(new RowRegexExprNode[path.size()]); Set<String> identifiers = null; for (int i = 0; i < patharr.length; i++) { RowRegexExprNode parent = patharr[i]; if (!(parent instanceof RowRegexExprNodeConcatenation)) { continue; } RowRegexExprNodeConcatenation concat = (RowRegexExprNodeConcatenation) parent; int indexWithinConcat; if (i == patharr.length - 1) { indexWithinConcat = parent.getChildNodes().indexOf(atom); } else { indexWithinConcat = parent.getChildNodes().indexOf(patharr[i + 1]); } if (identifiers == null && indexWithinConcat > 0) { identifiers = new HashSet<String>(); } for (int j = 0; j < indexWithinConcat; j++) { RowRegexExprNode concatChildNode = concat.getChildNodes().get(j); recursiveCollectAtomsWExclude(concatChildNode, identifiers, atom.getTag()); } } if (identifiers == null) { return; } Set<String> existingVisibility = map.get(atom.getTag()); if (existingVisibility == null) { map.put(atom.getTag(), identifiers); } else { existingVisibility.addAll(identifiers); } } private static void recursiveCollectAtomsWExclude(RowRegexExprNode node, Set<String> identifiers, String excludedTag) { if (node instanceof RowRegexExprNodeAtom) { RowRegexExprNodeAtom atom = (RowRegexExprNodeAtom) node; if (!excludedTag.equals(atom.getTag())) { identifiers.add(atom.getTag()); } } for (RowRegexExprNode child : node.getChildNodes()) { recursiveCollectAtomsWExclude(child, identifiers, excludedTag); } } }