package edu.stanford.nlp.fsm; import edu.stanford.nlp.util.logging.Redwood; import edu.stanford.nlp.util.DisjointSet; import edu.stanford.nlp.util.ErasureUtils; import edu.stanford.nlp.util.FastDisjointSet; import edu.stanford.nlp.util.Generics; import edu.stanford.nlp.util.UnorderedPair; import java.util.*; /** * DFSAMinimizer minimizes (unweighted) deterministic finite state * automata. * * @author Dan Klein * @version 12/14/2000 */ public final class DFSAMinimizer { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(DFSAMinimizer.class); static boolean debug = false; private DFSAMinimizer() {} // static methods class static class IntPair { int i; int j; IntPair(int i, int j) { this.i = i; this.j = j; } } public static <T, S> void unweightedMinimize(DFSA<T, S> dfsa) { Set<DFSAState<T, S>> states = dfsa.states(); long time = System.currentTimeMillis(); if (debug) { time = System.currentTimeMillis(); log.info("\nStarting on " + dfsa.dfsaID); log.info(" -- " + states.size() + " states."); } int numStates = states.size(); // assign ids int id = 0; DFSAState<T, S>[] state = ErasureUtils.<DFSAState<T, S>[]>uncheckedCast(new DFSAState[numStates]); Map<DFSAState<T, S>, Integer> stateToID = Generics.newHashMap(); for (DFSAState<T, S> state1 : states) { state[id] = state1; stateToID.put(state1, Integer.valueOf(id)); id++; } // initialize grid boolean[][] distinct = new boolean[numStates][numStates]; List<IntPair>[][] dependentList = ErasureUtils.<List<IntPair>[][]>uncheckedCast(new List[numStates][numStates]); for (int i = 0; i < numStates; i++) { for (int j = i + 1; j < numStates; j++) { distinct[i][j] = state[i].isAccepting() != state[j].isAccepting(); } } if (debug) { log.info("Initialized: " + (System.currentTimeMillis() - time)); time = System.currentTimeMillis(); } // visit all non-distinct for (int i = 0; i < numStates; i++) { for (int j = i + 1; j < numStates; j++) { if (!distinct[i][j]) { DFSAState<T, S> state1 = state[i]; DFSAState<T, S> state2 = state[j]; IntPair ip = new IntPair(i, j); // check if some input distinguishes this pair Set<T> inputs = Generics.newHashSet(); inputs.addAll(state1.continuingInputs()); inputs.addAll(state2.continuingInputs()); boolean distinguishable = false; Set<IntPair> pendingIPairs = Generics.newHashSet(); Iterator<T> inputI = inputs.iterator(); while (inputI.hasNext() && !distinguishable) { T input = inputI.next(); DFSATransition<T, S> transition1 = state1.transition(input); DFSATransition<T, S> transition2 = state2.transition(input); if ((transition1 == null) != (transition2 == null)) { distinguishable = true; } if (transition1 != null && transition2 != null) { DFSAState<T, S> target1 = transition1.getTarget(); DFSAState<T, S> target2 = transition2.getTarget(); int num1 = stateToID.get(target1).intValue(); int num2 = stateToID.get(target2).intValue(); IntPair targetIPair = new IntPair(num1, num2); if (num1 != num2) { if (distinct[num1][num2]) { distinguishable = true; } else { pendingIPairs.add(targetIPair); } } } } if (distinguishable) { // if the pair is distinguishable, record that List<IntPair> markStack = new ArrayList<>(); markStack.add(ip); while (!markStack.isEmpty()) { IntPair ipToMark = markStack.get(markStack.size() - 1); markStack.remove(markStack.size() - 1); distinct[ipToMark.i][ipToMark.j] = true; List<IntPair> addList = dependentList[ipToMark.i][ipToMark.j]; if (addList != null) { markStack.addAll(addList); } } } else { // otherwise add it to any pending pairs for (IntPair pendingIPair : pendingIPairs) { List<IntPair> dependentList1 = dependentList[pendingIPair.i][pendingIPair.j]; if (dependentList1 == null) { dependentList1 = new ArrayList<>(); dependentList[pendingIPair.i][pendingIPair.j] = dependentList1; } dependentList1.add(ip); } } } } } if (debug) { log.info("All pairs marked: " + (System.currentTimeMillis() - time)); time = System.currentTimeMillis(); } // decide what canonical state each state will map to... DisjointSet<DFSAState<T, S>> stateClasses = new FastDisjointSet<>(states); for (int i = 0; i < numStates; i++) { for (int j = i + 1; j < numStates; j++) { if (!distinct[i][j]) { DFSAState<T, S> state1 = state[i]; DFSAState<T, S> state2 = state[j]; stateClasses.union(state1, state2); } } } Map<DFSAState<T, S>, DFSAState<T, S>> stateToRep = Generics.newHashMap(); for (DFSAState<T, S> state1 : states) { DFSAState<T, S> rep = stateClasses.find(state1); stateToRep.put(state1, rep); } if (debug) { log.info("Canonical states chosen: " + (System.currentTimeMillis() - time)); time = System.currentTimeMillis(); } // reduce the DFSA by replacing transition targets with their reps for (DFSAState<T, S> state1 : states) { if (!state1.equals(stateToRep.get(state1))) { continue; } for (DFSATransition<T, S> transition : state1.transitions()) { //if (!transition.target.equals(stateToRep.get(transition.target))) // System.out.println(Utils.pad(transition.target.toString(),30)+stateToRep.get(transition.target)); transition.target = stateToRep.get(transition.target); } } dfsa.initialState = stateToRep.get(dfsa.initialState); if (debug) { log.info("Done: " + (System.currentTimeMillis() - time)); } // done! } static <T, S> void unweightedMinimizeOld(DFSA<T, S> dfsa) { Set<DFSAState<T, S>> states = dfsa.states(); Map<UnorderedPair<DFSAState<T, S>, DFSAState<T, S>>, List<UnorderedPair<DFSAState<T, S>, DFSAState<T, S>>>> stateUPairToDependentUPairList = Generics.newHashMap(states.size() * states.size() / 2 + 1); Map<UnorderedPair<DFSAState<T, S>, DFSAState<T, S>>, Boolean> stateUPairToDistinguished = Generics.newHashMap(states.size() * states.size() / 2 + 1); int[] c = new int[states.size() * states.size() / 2 + 1]; int streak = 0; int collisions = 0; int entries = 0; long time = System.currentTimeMillis(); if (debug) { time = System.currentTimeMillis(); log.info("Starting on " + dfsa.dfsaID); log.info(" -- " + states.size() + " states."); } // initialize grid int numDone = 0; for (DFSAState<T, S> state1 : states) { for (DFSAState<T, S> state2 : states) { UnorderedPair<DFSAState<T, S>, DFSAState<T, S>> up = new UnorderedPair<>(state1, state2); if (state1.equals(state2)) { continue; } if (stateUPairToDistinguished.containsKey(up)) { continue; } int bucket = (up.hashCode() & 0x7FFFFFFF) % (states.size() * states.size() / 2 + 1); c[bucket]++; entries++; if (c[bucket] > 1) { collisions++; streak = 0; } else { streak++; } if (state1.isAccepting() != state2.isAccepting()) { //log.info(Utils.pad((String)state1.stateID, 20)+" "+state2.stateID); stateUPairToDistinguished.put(up, Boolean.TRUE); } else { stateUPairToDistinguished.put(up, Boolean.FALSE); //stateUPairToDependentUPairList.put(up, new ArrayList()); } } numDone++; if (numDone % 20 == 0) { log.info("\r" + numDone + " " + ((double) collisions / (double) entries)); } } if (debug) { log.info("\nInitialized: " + (System.currentTimeMillis() - time)); time = System.currentTimeMillis(); } // visit each undistinguished pair for (UnorderedPair<DFSAState<T, S>, DFSAState<T, S>> up : stateUPairToDistinguished.keySet()) { DFSAState<T, S> state1 = up.first; DFSAState<T, S> state2 = up.second; if (stateUPairToDistinguished.get(up).equals(Boolean.TRUE)) { continue; } // check if some input distinguishes this pair Set<T> inputs = Generics.newHashSet(state1.continuingInputs()); inputs.addAll(state2.continuingInputs()); boolean distinguishable = false; Set<UnorderedPair<DFSAState<T, S>, DFSAState<T, S>>> pendingUPairs = Generics.newHashSet(); Iterator<T> inputI = inputs.iterator(); while (inputI.hasNext() && !distinguishable) { T input = inputI.next(); DFSATransition<T, S> transition1 = state1.transition(input); DFSATransition<T, S> transition2 = state2.transition(input); if ((transition1 == null) != (transition2 == null)) { distinguishable = true; } if (transition1 != null && transition2 != null) { DFSAState<T, S> target1 = transition1.getTarget(); DFSAState<T, S> target2 = transition2.getTarget(); UnorderedPair<DFSAState<T, S>, DFSAState<T, S>> targetUPair = new UnorderedPair<>(target1, target2); if (!target1.equals(target2)) { if (stateUPairToDistinguished.get(targetUPair).equals(Boolean.TRUE)) { distinguishable = true; } else { pendingUPairs.add(targetUPair); } } } } // if the pair is distinguishable, record that if (distinguishable) { List<UnorderedPair<DFSAState<T, S>, DFSAState<T, S>>> markStack = new ArrayList<>(); markStack.add(up); while (!markStack.isEmpty()) { UnorderedPair<DFSAState<T, S>, DFSAState<T, S>> upToMark = markStack.get(markStack.size() - 1); markStack.remove(markStack.size() - 1); stateUPairToDistinguished.put(upToMark, Boolean.TRUE); List<UnorderedPair<DFSAState<T, S>, DFSAState<T, S>>> addList = stateUPairToDependentUPairList.get(upToMark); if (addList != null) { markStack.addAll(addList); stateUPairToDependentUPairList.get(upToMark).clear(); } } } else { // otherwise add it to any pending pairs for (UnorderedPair<DFSAState<T, S>, DFSAState<T, S>> pendingUPair : pendingUPairs) { List<UnorderedPair<DFSAState<T, S>, DFSAState<T, S>>> dependentList = stateUPairToDependentUPairList.get(pendingUPair); if (dependentList == null) { dependentList = new ArrayList<>(); stateUPairToDependentUPairList.put(pendingUPair, dependentList); } dependentList.add(up); } } } if (debug) { log.info("All pairs marked: " + (System.currentTimeMillis() - time)); time = System.currentTimeMillis(); } // decide what canonical state each state will map to... DisjointSet<DFSAState<T, S>> stateClasses = new FastDisjointSet<>(states); for (UnorderedPair<DFSAState<T, S>, DFSAState<T, S>> up : stateUPairToDistinguished.keySet()) { if (stateUPairToDistinguished.get(up).equals(Boolean.FALSE)) { DFSAState<T, S> state1 = up.first; DFSAState<T, S> state2 = up.second; stateClasses.union(state1, state2); } } Map<DFSAState<T, S>, DFSAState<T, S>> stateToRep = Generics.newHashMap(); for (DFSAState<T, S> state : states) { DFSAState<T, S> rep = stateClasses.find(state); stateToRep.put(state, rep); } if (debug) { log.info("Canonical states chosen: " + (System.currentTimeMillis() - time)); time = System.currentTimeMillis(); } // reduce the DFSA by replacing transition targets with their reps for (DFSAState<T, S> state : states) { if (!state.equals(stateToRep.get(state))) { continue; } for (DFSATransition<T, S> transition : state.transitions()) { transition.target = stateClasses.find(transition.target); } } dfsa.initialState = stateClasses.find(dfsa.initialState); if (debug) { log.info("Done: " + (System.currentTimeMillis() - time)); } // done! } }