package edu.princeton.cs.algs4.ch55; import edu.princeton.cs.algs4.ch13.Bag; import edu.princeton.cs.algs4.ch13.Stack; import edu.princeton.cs.algs4.ch42.Digraph; import edu.princeton.cs.algs4.ch42.DirectedDFS; import edu.princeton.cs.introcs.*; /************************************************************************* * Compilation: javac NFA.java * Execution: java NFA regexp text * Dependencies: Stack.java Bag.java Digraph.java DirectedDFS.java * * % java NFA "(A*B|AC)D" AAAABD * true * * % java NFA "(A*B|AC)D" AAAAC * false * * % java NFA "(a|(bc)*d)*" abcbcd * true * * % java NFA "(a|(bc)*d)*" abcbcbcdaaaabcbcdaaaddd * true * * Remarks * ----------- * - This version does not suport the + operator or multiway-or. * * - This version does not handle character classes, * metacharacters (either in the text or pattern), capturing * capabilities, greedy vs. relucantant modifier, and * other features in industrial-strength implementations such * as java.util.regexp. * *************************************************************************/ public class NFA { private Digraph G; // digraph of epsilon transitions private String regexp; // regular expression private int M; // number of characters in regular expression // Create the NFA for the given RE public NFA(String regexp) { this.regexp = regexp; M = regexp.length(); Stack<Integer> ops = new Stack<Integer>(); G = new Digraph(M+1); for (int i = 0; i < M; i++) { int lp = i; if (regexp.charAt(i) == '(' || regexp.charAt(i) == '|') ops.push(i); else if (regexp.charAt(i) == ')') { int or = ops.pop(); // 2-way or operator if (regexp.charAt(or) == '|') { lp = ops.pop(); G.addEdge(lp, or+1); G.addEdge(or, i); } else if (regexp.charAt(or) == '(') lp = or; else assert false; } // closure operator (uses 1-character lookahead) if (i < M-1 && regexp.charAt(i+1) == '*') { G.addEdge(lp, i+1); G.addEdge(i+1, lp); } if (regexp.charAt(i) == '(' || regexp.charAt(i) == '*' || regexp.charAt(i) == ')') G.addEdge(i, i+1); } } // Does the NFA recognize txt? public boolean recognizes(String txt) { DirectedDFS dfs = new DirectedDFS(G, 0); Bag<Integer> pc = new Bag<Integer>(); for (int v = 0; v < G.V(); v++) if (dfs.marked(v)) pc.add(v); // Compute possible NFA states for txt[i+1] for (int i = 0; i < txt.length(); i++) { Bag<Integer> match = new Bag<Integer>(); for (int v : pc) { if (v == M) continue; if ((regexp.charAt(v) == txt.charAt(i)) || regexp.charAt(v) == '.') match.add(v+1); } dfs = new DirectedDFS(G, match); pc = new Bag<Integer>(); for (int v = 0; v < G.V(); v++) if (dfs.marked(v)) pc.add(v); // optimization if no states reachable if (pc.size() == 0) return false; } // check for accept state for (int v : pc) if (v == M) return true; return false; } public static void main(String[] args) { String regexp = "(" + args[0] + ")"; String txt = args[1]; if (txt.indexOf('|') >= 0) { throw new IllegalArgumentException("| character in text is not supported"); } NFA nfa = new NFA(regexp); StdOut.println(nfa.recognizes(txt)); } } /************************************************************************* * Copyright 2002-2012, Robert Sedgewick and Kevin Wayne. * * This file is part of algs4-package.jar, which accompanies the textbook * * Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne, * Addison-Wesley Professional, 2011, ISBN 0-321-57351-X. * http://algs4.cs.princeton.edu * * * algs4-package.jar is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * algs4-package.jar is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with algs4-package.jar. If not, see http://www.gnu.org/licenses. *************************************************************************/