/* Created on Feb 1, 2013 by Florian Leitner. * Copyright 2013. All rights reserved. */ package com.tuplejump.stargate.lucene.query.fsm; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; /** * This scanner implements sequence comparison over <i>Iterators</i> using the * <b>Knuth-Morris-Pratt</b> pattern matching algorithm. * <p> * As this is an exact matcher, elements are compared using <code>equals(Object)</code>. Note that * the empty pattern is illegal, while a <code>null</code> in the pattern is allowed to match a * <code>null</code> in the sequence if at the right position. * * @author Florian Leitner */ public final class ExactScanner<E> extends MatcherBase<E> { /** The KMP 'alphabetized' transition tables. */ private final Map<E, int[]> dfa; /** * Create a scanner for a pattern sequence, preprocessing the transition tables. * * @param pattern sequence that should lead to a match * @throws IllegalArgumentException if the pattern is empty */ public ExactScanner(final List<E> pattern) { super(pattern); dfa = new HashMap<E, int[]>(); // initialize the transition tables: int[] next; for (E transition : pattern) dfa.put(transition, new int[end]); dfa.get(pattern.get(0))[0] = 1; // initial state match transition // calculate the transitions: for (int base = 0, pointer = 1; pointer < end; pointer++) { for (int[] change : dfa.values()) change[pointer] = change[base]; // set state changes for mismatches next = dfa.get(this.pattern.get(pointer)); // get the table for the current element next[pointer] = pointer + 1; // store state change for match base = next[base]; // update current base state } } /** Convenience method to construct the scanner from an iterator. */ public ExactScanner(final Iterator<E> pattern) { this(newLinkedList(pattern)); } /** Return a linked list of the content in <code>iterator</code> . */ private static <E> LinkedList<E> newLinkedList(Iterator<E> iterator) { LinkedList<E> ll = new LinkedList<E>(); while (iterator.hasNext()) ll.add(iterator.next()); return ll; } /** Returns an updated <code>pointer</code> using the transition table */ private int transition(final E element, final int pointer) { // if the element is known, and given the current state (pointer), find the next (pointer) if (dfa.containsKey(element)) return dfa.get(element)[pointer]; else return 0; // otherwise, return the initial state (pointer) } @Override public int radix() { return dfa.size(); } /** * Determine if the pattern matches anywhere in a stream. * <p> * The iteration will halt <i>after</i> the last element of a valid pattern has been found or * consumes the entire stream otherwise. * * @param seqIt the sequence stream to scan * @return <code>true</code> if the stream contained the pattern */ public boolean scan(final Iterator<E> seqIt) { int pointer = 0; while (seqIt.hasNext()) { pointer = transition(seqIt.next(), pointer); if (pointer == end) return true; } return false; } }