// Copyright (c) 2011, David J. Pearce (djp@ecs.vuw.ac.nz) // All rights reserved. // // This software may be modified and distributed under the terms // of the BSD license. See the LICENSE file for details. package wyautl_old.lang; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.Comparator; import wyautl_old.lang.Automaton.State; import wyautl_old.util.BinaryMatrix; /** * <p> * This class provides various algorithms for manipulating automata. In * particular, the following main algorithms are provided: * </p> * <ul> * <li><b>Extraction.</b> This is used to extract one automaton out of another.</li> * <li><b>Minimisation.</b> This is used to eliminate equivalent states within * an automaton.</li> * <li><b>Canonicalisation.</b> This is used to convert an automaton into a * canonical form.</li> * </ul> * * @author David J. Pearce */ public final class Automata { /** * <p> * Check whether or not an automaton is "concrete". A concrete automaton * cannot have recursive links or non-deterministic states.' * </p> */ public static boolean isConcrete(Automaton automaton) { // First, check all states are deterministic for(int i=0;i!=automaton.size();++i) { State s = automaton.states[i]; if(!s.deterministic) { return false; } } // Second, check for cycles (i.e. recursive links) BitSet visited = new BitSet(automaton.size()); BitSet onStack = new BitSet(automaton.size()); return isConcrete(0,onStack,visited,automaton); } /** * Helper algorithm. This is similar to the well-known algorithm for finding * strongly connected components. The main difference is that it doesn't * actually return the components. * * @param index * --- current node being visited. * @param onStack * --- indicates which nodes are on the current path from the * root. * @param visited * --- indicates which nodes have been visited (but may not be on * the current path). * @param automaton * --- the automaton being traversed. * @return --- true if the automaton is concrete. */ private static boolean isConcrete(int index, BitSet onStack, BitSet visited, Automaton automaton) { if(onStack.get(index)) { return false; // found a cycle! } if (visited.get(index)) { // Ok, we've traversed this node before and it checked out OK. return true; } visited.set(index); onStack.set(index); State state = automaton.states[index]; for(int child : state.children) { if(!isConcrete(child,onStack,visited,automaton)) { return false; } } onStack.set(index,false); return true; } /** * <p> * Traverse the automaton rooted at the given state and recursively extract * all reachable states to produce a (potentially smaller) automaton. * </p> * * <p> * <b>NOTE:</b> one additional use-case for this method is to effectively * "garbage collect" states in the automaton. That is, if you extract from * the root of any automaton, you'll get an automaton consisting only of those * nodes reachable from the root --- but, no others. Therefore, unreachable * nodes (which can arise as a result of other automaton operations) are * lost. * </p> * * * @param automaton * --- automaton to extract from * @param root * --- state in automaton to begin extraction from * @return --- extracted automaton. */ public static Automaton extract(Automaton automaton, int root) { // First, perform a depth-first search from the root. State[] nodes = automaton.states; ArrayList<Integer> extracted = new ArrayList<Integer>(); extract(root,new BitSet(nodes.length),extracted,nodes); // Second, build up remapping int[] remap = new int[nodes.length]; int i=0; for(int j : extracted) { remap[j]=i++; } // Third, apply remapping State[] newNodes = new State[extracted.size()]; i=0; for(int j : extracted) { newNodes[i++] = remap(nodes[j],remap); } return new Automaton(newNodes); } public static void extractOnto(int index, Automaton automaton, ArrayList<Automaton.State> newNodes) { // First, perform a depth-first search from the root. State[] nodes = automaton.states; ArrayList<Integer> extracted = new ArrayList<Integer>(); extract(index,new BitSet(nodes.length),extracted,nodes); // Second, build up remapping int[] remap = new int[nodes.length]; int i=newNodes.size(); for(int j : extracted) { remap[j]=i++; } // Third, apply remapping i=0; for(int j : extracted) { newNodes.add(remap(nodes[j],remap)); } } /** * The following method recursively extracts the subgraph rooted at * <code>index</code> in the given graph using a depth-first search. * Vertices in the subgraph are added to <code>extracted</code> in the order * they are visited. * * @param index * --- the node to extract the subgraph from. * @param visited * --- the set of vertices already visited * @param extracted * --- the list of vertices that make up the subgraph which is * built by this method. * @param graph * --- the graph. */ private final static void extract(int index, BitSet visited, ArrayList<Integer> extracted, State[] graph) { if(visited.get(index)) { return; } // node already visited} extracted.add(index); visited.set(index); State node = graph[index]; for(int child : node.children) { extract(child,visited,extracted,graph); } } /** * <p> * This method minimises an automaton by removing equivalent states. Two * states <code>s1</code> and <code>s2</code> are considered equivalent * under the following conditions: * </p> * <ul> * <li>They are both leaf nodes of the same kind with identical * supplementary data.</li> * <li>They are nodes of the same (sequential) kind whose children at each * position are equivalent</li> * <li>They are nodes of the same (non-sequential) kind where for each child * in one, there is an equivalent child in the other and vice-versa.</li> * </ul> * * @param automaton * --- automaton to minimise * @return --- minimised automaton */ public final static Automaton minimise(Automaton automaton) { // First, determine equivalence classes BinaryMatrix equivs = new BinaryMatrix(automaton.size(),automaton.size(),true); determineEquivalenceClasses(equivs,automaton); // TODO: optimise the case when all equivalence classes have unit size. // Second, determine representative nodes for each equivalence class. int oldSize = automaton.size(); int[] mapping = new int[oldSize]; int newSize = 0; for(int i=0;i!=oldSize;++i) { int classRep = i; for(int j=0;j<i;++j) { if(equivs.get(i,j)) { classRep = j; break; } } if(i == classRep) { mapping[i] = newSize++; } else { mapping[i] = mapping[classRep]; } } // Finally, reconstruct minimised automaton State[] oldStates = automaton.states; State[] newStates = new State[newSize]; for (int i = 0; i != oldSize; ++i) { int classRep = mapping[i]; if (newStates[classRep] == null) { // this node is unallocated newStates[classRep] = remap(oldStates[i], mapping); } } return new Automaton(newStates); } private final static void determineEquivalenceClasses(BinaryMatrix equivs, Automaton automaton) { boolean changed = true; int size = automaton.size(); while (changed) { changed = false; for (int i = 0; i < size; ++i) { for (int j = i + 1; j < size; ++j) { if(equivs.get(i,j)) { // no need to explore nodes which are already known to // be not equivalent. boolean b = equivalent(i, j, equivs, automaton); equivs.set(i, j, b); equivs.set(j, i, b); changed |= !b; } } } } } /* * Check whether two states are equivalent under the rules set out for * minimisation above. */ private final static boolean equivalent(int i, int j, BinaryMatrix equivs, Automaton automaton) { State s1 = automaton.states[i]; State s2 = automaton.states[j]; // first, check supplementary data Object s1data = s1.data; Object s2data = s2.data; if(s1data == null) { if(s2data != null) { return false; } } else { // following catches case where s2data == null as well if(!s1data.equals(s2data)) { return false; } } // second, check node kind and children, etc. if(s1.kind == s2.kind && s1.deterministic == s2.deterministic) { boolean deterministic = s1.deterministic; if(deterministic) { int[] s1children = s1.children; int[] s2children = s2.children; if(s1children.length != s2children.length) { return false; } int length = s1children.length; for(int k=0;k!=length;++k) { int s1child = s1children[k]; int s2child = s2children[k]; if(!equivs.get(s1child,s2child)) { return false; } } return true; } else { // non-deterministic (i.e. more expensive) case int[] s1children = s1.children; int[] s2children = s2.children; int s1length = s1children.length; int s2length = s2children.length; // First, check every node in s1 has equivalent in s2 for(int k=0;k!=s1length;++k) { int s1child = s1children[k]; boolean matched = false; for(int l=0;l!=s2length;++l) { int s2child = s2children[l]; if(equivs.get(s1child,s2child)) { matched = true; break; } } if(!matched) { return false; } } // Second, check every node in s2 has equivalent in s1 for(int k=0;k!=s2length;++k) { int s2child = s2children[k]; boolean matched = false; for(int l=0;l!=s1length;++l) { int s1child = s1children[l]; if(equivs.get(s1child,s2child)) { matched = true; break; } } if(!matched) { return false; } } return true; } } return false; } /** * The remap method takes a node, and mapping from vertices in the old * space to the those in the new space. It then applies this mapping, so * that the node now refers to vertices in the new space. Or, in * other words, it transposes the node into the new space. * * @param node * --- node to be transposed. * @param rmap * --- mapping from integers in old space to those in new * space. */ public static State remap(State node, int[] rmap) { int[] ochildren = node.children; int[] nchildren; if(node.deterministic) { nchildren = new int[ochildren.length]; for (int i = 0; i != ochildren.length; ++i) { nchildren[i] = rmap[ochildren[i]]; } } else { // slightly harder for non-deterministic case BitSet visited = new BitSet(rmap.length); for (int i = 0; i != ochildren.length; ++i) { int nchild = rmap[ochildren[i]]; visited.set(nchild); } int nlength = visited.cardinality(); nchildren = new int[nlength]; int j=0; for (int i = visited.nextSetBit(0); i >= 0; i = visited .nextSetBit(i + 1)) { nchildren[j++] = i; } } return new State(node.kind,node.data,node.deterministic,nchildren); } }