package com.fulmicoton.multiregexp;
import dk.brics.automaton.Automaton;
import dk.brics.automaton.RunAutomaton;
import dk.brics.automaton.State;
import dk.brics.automaton.StatePair;
import dk.brics.automaton.Transition;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class MultiPatternSearcher {
private final MultiPatternAutomaton automaton;
private final List<RunAutomaton> individualAutomatons;
private final List<RunAutomaton> inverseAutomatons;
MultiPatternSearcher(final MultiPatternAutomaton automaton,
final List<Automaton> individualAutomatons) {
this.automaton = automaton;
this.individualAutomatons = new ArrayList<>();
for (final Automaton individualAutomaton: individualAutomatons) {
this.individualAutomatons.add(new RunAutomaton(individualAutomaton));
}
this.inverseAutomatons = new ArrayList<>(this.individualAutomatons.size());
for (final Automaton individualAutomaton: individualAutomatons) {
final Automaton inverseAutomaton = inverseAutomaton(individualAutomaton);
this.inverseAutomatons.add(new RunAutomaton(inverseAutomaton));
}
}
static Automaton inverseAutomaton(final Automaton automaton) {
final Map<State, State> stateMapping = new HashMap<>();
for (final State state: automaton.getStates()) {
stateMapping.put(state, new State());
}
for (final State state: automaton.getStates()) {
for (final Transition transition: state.getTransitions()) {
final State invDest = stateMapping.get(state);
final State invOrig = stateMapping.get(transition.getDest());
invOrig.addTransition(new Transition(transition.getMin(), transition.getMax(), invDest));
}
}
final Automaton inverseAutomaton = new Automaton();
stateMapping.get(automaton.getInitialState()).setAccept(true);
final State initialState = new State();
inverseAutomaton.setInitialState(initialState);
final List<StatePair> epsilons = new ArrayList<>();
for (final State acceptState: automaton.getAcceptStates()) {
final State invOrigState = stateMapping.get(acceptState);
final StatePair statePair = new StatePair(initialState, invOrigState);
epsilons.add(statePair);
}
inverseAutomaton.addEpsilons(epsilons);
return inverseAutomaton;
}
public Cursor search(CharSequence s) {
return search(s, 0);
}
public Cursor search(CharSequence s, int position) {
return new Cursor(s, position);
}
public class Cursor {
private final CharSequence seq;
private int matchingPattern = -1;
private int end = 0;
private int start = -1;
Cursor(CharSequence seq, int position) {
this.seq = seq;
this.end = position;
}
public int start() {
return this.start;
}
public int end() {
return this.end;
}
public int match() {
return this.matchingPattern;
}
public boolean found() {
return this.matchingPattern >= 0;
}
/* Advances the cursor, to the next match of any pattern.
* Matches returned cannot overlap.
*
* Any ambiguity is solved according to the following method.
*
* 1) we advance up to the end of at least one pattern
* 2) if more than one pattern is found choose the one the highest
* priority (== lower id)
* 3) we choose the leftmost possible start for this pattern
* to match at the end we found.
* 4) Finally, we extend the pattern as much as possible on the right.
*
* The function then returns true and start(), end() will
* return respectively the starting offset of the pattern.
* position holds the offset of what would
* be the character right after the match.
*
* If no match is found the function return false.
*/
public boolean next() {
this.start = -1;
this.matchingPattern = -1;
final int seqLength = this.seq.length();
{ // first find a match and "choose the pattern".
int state = 0;
for (int pos=this.end; pos < seqLength; pos++) {
final char c = this.seq.charAt(pos);
state = automaton.step(state, c);
if (automaton.atLeastOneAccept[state]) {
// We found a match!
this.matchingPattern = automaton.accept[state][0];
this.end = pos;
break;
}
}
if (this.matchingPattern == -1) {
return false;
}
}
{ // we rewind using the backward automaton to find the start of the pattern.
final RunAutomaton backwardAutomaton = inverseAutomatons.get(this.matchingPattern);
int state = backwardAutomaton.getInitialState();
for (int pos = this.end; pos >= 0; pos--) {
final char c = this.seq.charAt(pos);
state = backwardAutomaton.step(state, c);
if (state == -1) {
break;
}
if (backwardAutomaton.isAccept(state)) {
start = pos;
}
}
}
{ // we go forward again using the forward automaton to find the end of the pattern.
final RunAutomaton forwardAutomaton = individualAutomatons.get(this.matchingPattern);
int state = forwardAutomaton.getInitialState();
for (int pos = this.start; pos < seqLength; pos++) {
final char c = this.seq.charAt(pos);
state = forwardAutomaton.step(state, c);
if (state == -1) {
break;
}
if (forwardAutomaton.isAccept(state)) {
this.end = pos + 1;
}
}
}
return true;
}
}
}