package com.fulmicoton.multiregexp; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; import dk.brics.automaton.Automaton; import dk.brics.automaton.DkBricsAutomatonHelper; import dk.brics.automaton.State; public class MultiPatternAutomaton { public final int[][] accept; final boolean[] atLeastOneAccept; private final int stride; private final int[] transitions; private final int[] alphabet; private final int nbPatterns; private MultiPatternAutomaton(final int[][] accept, final int[] transitions, final char[] points, final int nbPatterns) { this.accept = accept; this.transitions = transitions; this.alphabet = alphabet(points); this.stride = points.length; this.atLeastOneAccept = new boolean[accept.length]; for (int i=0; i<accept.length; i++) { this.atLeastOneAccept[i] = this.accept[i].length > 0; } this.nbPatterns = nbPatterns; } private static int[] alphabet(final char[] points) { final int[] alphabet = new int[Character.MAX_VALUE - Character.MIN_VALUE + 1]; int i = 0; for (int j = 0; j <= (Character.MAX_VALUE - Character.MIN_VALUE); j++) { if (i + 1 < points.length && j == points[i + 1]) i++; alphabet[j] = i; } return alphabet; } static MultiState initialState(List<Automaton> automata) { final State[] initialStates = new State[automata.size()]; int c = 0; for (final Automaton automaton: automata) { initialStates[c] = automaton.getInitialState(); c += 1; } return new MultiState(initialStates); } static MultiPatternAutomaton make(final List<Automaton> automata) { for (final Automaton automaton: automata) { automaton.determinize(); } final char[] points = DkBricsAutomatonHelper.pointsUnion(automata); // states that are still to be visited final Queue<MultiState> statesToVisits = new LinkedList<>(); final MultiState initialState = initialState(automata); statesToVisits.add(initialState); final List<int[]> transitionList = new ArrayList<>(); final Map<MultiState, Integer> multiStateIndex = new HashMap<>(); multiStateIndex.put(initialState, 0); while (!statesToVisits.isEmpty()) { final MultiState visitingState = statesToVisits.remove(); assert multiStateIndex.containsKey(visitingState); final int[] curTransitions = new int[points.length]; for (int c=0; c<points.length; c++) { final char point = points[c]; final MultiState destState = visitingState.step(point); if (destState.isNull()) { curTransitions[c] = -1; } else { final int destStateId; if (!multiStateIndex.containsKey(destState)) { statesToVisits.add(destState); destStateId = multiStateIndex.size(); multiStateIndex.put(destState, destStateId); } else { destStateId = multiStateIndex.get(destState); } curTransitions[c] = destStateId; } } transitionList.add(curTransitions); } assert transitionList.size() == multiStateIndex.size(); final int nbStates = multiStateIndex.size(); final int[] transitions = new int[nbStates * points.length]; for (int stateId=0; stateId<nbStates; stateId++) { for (int pointId = 0; pointId<points.length; pointId++) { transitions[stateId * points.length + pointId] = transitionList.get(stateId)[pointId]; } } final int[][] acceptValues = new int[nbStates][]; for (final Map.Entry<MultiState, Integer> entry: multiStateIndex.entrySet()) { final Integer stateId = entry.getValue(); final MultiState multiState = entry.getKey(); acceptValues[stateId] = multiState.toAcceptValues(); } return new MultiPatternAutomaton(acceptValues, transitions, points, automata.size()); } public int step(final int state, final char c) { return transitions[(state * this.stride) + alphabet[c - Character.MIN_VALUE]]; } public int getNbPatterns() { return this.nbPatterns; } }