/* * (C) Copyright 2005 Arnaud Bailly (arnaud.oqube@gmail.com), * Yves Roos (yroos@lifl.fr) and others. * * Licensed under the Apache License, Version 2.0 (the License); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an AS IS BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package rationals.distance; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import rationals.Automaton; import rationals.graph.AutomatonGraphAdapter; import rationals.State; import rationals.Transition; import rationals.properties.IsDeterministic; import salvo.jesus.graph.algorithm.DirectedGraphAdjacencyMatrix; import salvo.jesus.graph.algorithm.DirectedGraphDualMatrix; /** * This class computes various metrics related to the vectorial space associated * with a given automaton. The automaton is first minimalized. * * @author nono * @version $Id: Distance.java 2 2006-08-24 14:41:48Z oqube $ */ public abstract class Distance { private Automaton dfa; private Map /* < Transition, Integer > */idx; private Map /* < State, Integer > */sidx = new HashMap(); private int dim; private double[] bounds; private transient double[] diff; /** * Cosntruct a distance from an automaton. * * @param a must be deterministic. */ public Distance(Automaton a) { if(!new IsDeterministic().test(a)) throw new IllegalArgumentException("Cannot create distance on non deterministic autoamaton"); this.dfa =a; //this.dfa = new Reducer().transform(a); /* compute maximum coordinates matrices */ DirectedGraphDualMatrix m = new DirectedGraphDualMatrix(new AutomatonGraphAdapter(a)); double[][] tm = new salvo.jesus.graph.algorithm.Distance(m).distances(); DirectedGraphAdjacencyMatrix am = new DirectedGraphAdjacencyMatrix(new AutomatonGraphAdapter(a)); double[][] sm = new salvo.jesus.graph.algorithm.Distance(am) .distances(); /* create index map */ this.idx = new HashMap(); this.bounds = new double[tm.length]; this.diff = new double[tm.length]; int k = 0; int si = 0; /* get index of start state */ for (Iterator it = this.dfa.states().iterator(); it.hasNext(); k++) { State st = (State) it.next(); sidx.put(st, new Integer(k)); if (st.isInitial()) { si = k; } } int i=0; for (Iterator it = dfa.delta().iterator(); it.hasNext(); i++) { Transition tr = (Transition) it.next(); idx.put(tr, new Integer(i)); /* bounds for cycle members = length of loop */ if (tm[i][i] != Double.POSITIVE_INFINITY) { bounds[i] = 1 / exponent(tm[i][i]); } else { /* bound = distance from start state */ Integer in = (Integer) sidx.get(tr.end()); bounds[i] = 1 / exponent(sm[si][in.intValue()] + 1); } } this.dim = i; } /** * Compute the vector for a given word. The <code>word</code> is expected * to be word from the language accepted by the associated automaton or a * prefix of such a word. * * @param word * a List of Object representing a word in this automaton. * @return an array of int representing the vector for this word. Null if * word is not a prefix of the associated language. * */ public int[] vector(List word) { int[] vec = new int[dim]; State s = (State) dfa.initials().iterator().next(); /* * dfa is * deterministic and * minimal */ for (Iterator i = word.iterator(); i.hasNext();) { Object l = i.next(); /* compute next transition */ Set d = dfa.delta(s, l); if (d.size() == 0) return null; Transition tr = (Transition) d.iterator().next(); int ix = ((Integer) idx.get(tr)).intValue(); vec[ix]++; s = tr.end(); } return vec; } /** * Compute the normalized vector for a given word. The raw vector's * coefficients are divided by the L-2 norm of the vector. * * @param word * a List of Object representing a word in this automaton. * @return a array of double objects representing normalized vector. null if * <code>word</code> is not in this language. */ public double[] normalize(List word) { int[] vec = vector(word); if (vec == null) return null; return normalize(vec); } /** * Compute the distance between two words. * * @param word1 * @param word2 * @return a distance. Returns infinity if either word1 or word2 are not * recognized prefixes of this dfa. */ public double distance(List word1, List word2) { double[] n1 = normalize(word1); if (n1 == null) return Double.POSITIVE_INFINITY; double[] n2 = normalize(word2); if (n2 == null) return Double.POSITIVE_INFINITY; return distance(n1, n2); } /** * @param n1 * @param n2 * @return */ private synchronized double distance(double[] n1, double[] n2) { double d = 0; for (int i = 0; i < dim; i++) { diff[i] = n1[i] - n2[i]; } return norm(diff); } private double border(double[] vec, int i) { for (int j = 0; j < dim; j++) { diff[j] = i == j ? bounds[i] : 0; } return distance(vec, diff); } /** * Compute the eta-coverage of a set of words with respect to this distance. * * @param w * a Set of List of words * @return an upper bound on the eta-coverage of this set of words. */ public double etaCoverage(Set w) { int n = w.size(); /*********************************************************************** * map from words to normalized vectors * */ LinkedHashMap vecs = new LinkedHashMap(); for (Iterator it = w.iterator(); it.hasNext();) { List l = (List) it.next(); vecs.put(normalize(l), l); } double[] brd = new double[dim]; Arrays.fill(brd,Double.MAX_VALUE); double ret = Double.MIN_VALUE; /* compute the maximum distance between words of w */ double[][] vs = (double[][]) vecs.keySet().toArray(new double[n][]); System.err.println("Max border = " + brd); for (int i = 0; i < n - 1; i++) { /* compute the maximum of minimum distance to borders */ for (int k = 1; k < dim; k++) { double ec = border(vs[i], k); if (ec < brd[k]) brd[k] = ec; } double min = Double.MAX_VALUE; double[] minv = null; /* compute the closest words on each dimension */ for (int j = i + 1; j < n; j++) { double d = distance(vs[i], vs[j]); if (d < min) { min = d; minv = vs[j]; } } System.err.println("min distance from " + vecs.get(vs[i]) + "=" + min + " to "+ vecs.get(minv)); if (min > ret) ret = min; } System.err.println("Max kernel = " + ret); return ret; } class VectorAndLength { /** * @param uv * @param len2 */ public VectorAndLength(double[] uv, double len2, double[] norm) { this.vec = uv; this.norm = norm; this.len = len2; } double[] vec; double[] norm; double len; } /** * Computes a set of words from <code>dfa</code> which is an eta-coverage * of the full language recognized by the dfa. * * @param eta * the required coverage. Must be greater than 0 and lower than * sqrt(2). * @return a Set of List denoting words which is an eta coverage of this * automaton's language */ @SuppressWarnings("unchecked") public Set etaCoverage(double eta) { if (eta < 0 || eta > exponent(2)) throw new IllegalArgumentException("Bad value of eta"); Set ret = new HashSet(); /* compute geodesic */ Geodesic geodesic = new Geodesic(eta, dim); Collection pts = geodesic.getPolyedron(); /* denormalize vectors and order by length */ List vecs = new ArrayList(); for (Iterator i = pts.iterator(); i.hasNext();) { double[] vec = (double[]) i.next(); double len = 1 / vec[0]; double[] uv = unnormalize(vec); int k = 0; /* insert in vecs */ for (Iterator j = vecs.iterator(); j.hasNext();) { VectorAndLength vl = (VectorAndLength) j.next(); if (len < vl.len) break; } vecs.add(k, new VectorAndLength(uv, len, vec)); } /* * explore automaton in breadh-first direction stop when all points have * been covered or no more points can be covered */ return explore(vecs, eta / 2); } class ExploreState { /** * @param curvec * @param ln2 * @param state2 * @param curword */ public ExploreState(int[] curvec, int ln2, State state2, List curword) { vec = new int[dim]; System.arraycopy(curvec, 0, vec, 0, dim); ln = ln2; state = state2; word = new ArrayList(curword); } int[] vec; int ln; State state; List word; } Set explore(List vecs, double eta) { Set ret = new HashSet(); LinkedList /* < ExploreState > */trq = new LinkedList(); int[] cv = new int[dim]; cv[0] = 1; ExploreState st = new ExploreState(cv, 0, (State) dfa.initials() .iterator().next(), new ArrayList()); trq.add(st); do { st = (ExploreState) trq.removeFirst(); Set trs = dfa.delta(st.state); int[] curvec = st.vec; int ln = st.ln; List word = st.word; for (Iterator k = trs.iterator(); k.hasNext();) { Transition tr = (Transition) k.next(); int j = ((Integer) idx.get(tr)).intValue(); curvec[j]++; ln++; double[] v = normalize(curvec); word.add(tr.label()); System.err.println("word =" + word); /* compare to vecs */ for (Iterator i = vecs.iterator(); i.hasNext();) { VectorAndLength vl = (VectorAndLength) i.next(); double d = distance(vl.norm, v); System.err.println(" distance =" + d); if (d < eta) { i.remove(); System.err.println("removing " + vl); ret.add(word); } } trq.add(new ExploreState(curvec, ln, tr.end(), word)); curvec[j]--; ln--; word.remove(word.size() - 1); } } while (!vecs.isEmpty()); return ret; } /** * @param curvec * @return */ private double[] normalize(int[] vec) { double[] ret = new double[dim]; for (int i = 0; i < dim; i++) ret[i] = vec[i]; double norm = norm(ret); for (int i = 0; i < dim; i++) ret[i] /= norm; return ret; } /** * @param ds * @return */ private double length(double[] ds) { double acc = 0; for (int i = 0; i < dim; i++) acc += ds[i]; return acc; } /** * Compute a word from a vector. This method computes a word from the * language of this automaton that is closest to the coordinates of vec and * at a distance lower than <code>eta</code>. * * @param vec * a normalized vector. * @param eta * maximum distance of returned word. * @return a word as List of objects from this automaton's alphabet. If * null, then no word can be constructed at a distance lower than * eta from this vector. */ public List makeWord(double[] vec, double eta) { /* denormalize vector */ double[] real = unnormalize(vec); /* map of transitions */ Map trm = new HashMap(); for (Iterator i = idx.entrySet().iterator(); i.hasNext();) { Map.Entry e = (Map.Entry) i.next(); Transition tr = (Transition) e.getKey(); int k = ((Integer) e.getValue()).intValue(); trm.put(tr, new Double(real[k])); } /* find a word */ List l = new ArrayList(); while (true) { } } /** * @param vec * @return */ private double[] unnormalize(double[] vec) { double[] ret = new double[vec.length]; /* by convention, vec[0] contains 1/norm */ double len = 1 / vec[0]; for (int i = 0; i < dim; i++) { ret[i] = vec[i] * len; } return ret; } /** * Returns the index map associating each transition with an index. * * @return a Map from Transition to Integer. */ public Map indices() { return idx; } /** * @return */ public Automaton getDfa() { return dfa; } /** * @return */ public int getDimension() { return dim; } /** * Returns an array of String denoting the ordered dimensions of this * distance. * * @return an array of String whose length equals <code>dimension</code>. */ public String[] getAxis() { String[] ret = new String[dim]; for (Iterator i = idx.entrySet().iterator(); i.hasNext();) { Map.Entry e = (Map.Entry) i.next(); int j = ((Integer) e.getValue()).intValue(); String lbl = e.getKey().toString(); ret[j] = lbl; } return ret; } /** * @return Returns the bounds. */ public double[] getBounds() { return bounds; } /** * @param bounds * The bounds to set. */ public void setBounds(double[] bounds) { this.bounds = bounds; } /** * Compute the norm of a vector in this space. * * @param vec * @return */ public abstract double norm(double[] vec); /** * The exponent used in the computation of the norm. For a L-k norm, this is * usually 1/k. * * @param d * @return */ public abstract double exponent(double d); }