/** * Copyright (C) 2013-2014 Olaf Lessenich * Copyright (C) 2014-2015 University of Passau, Germany * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301 USA * * Contributors: * Olaf Lessenich <lessenic@fim.uni-passau.de> * Georg Seibt <seibt@fim.uni-passau.de> */ package de.fosd.jdime.matcher.ordered.mceSubtree; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Function; import de.fosd.jdime.artifact.Artifact; import de.fosd.jdime.util.Tuple; /** * Trees can be described as balanced sequences. A balanced sequence is a sequence of even length over the alphabet * {0, 1}. The balanced sequence of a leaf node is the empty sequence. The balanced sequence of a non leaf node is the * concatenation of the balanced sequences of its children, every one preceded by a 0 and followed by a 1. The balanced * sequence of a tree is the balanced sequence of its root node. This implementation adds the root node to the * balanced sequence (as if by adding a virtual root node with a single child (the root node) and constructing the * normal balanced sequence of the tree). * * @param <T> * the type of the <code>Artifact</code> whose balanced sequence is to be constructed * * @author Georg Seibt * @see <a href="http://www.cs.upc.edu/~antoni/subtree.pdf">This Paper</a> */ public class BalancedSequence<T extends Artifact<T>> { @SuppressWarnings("unchecked") private static final BalancedSequence<?> EMPTY_SEQ = new BalancedSequence<>(Collections.EMPTY_LIST); private List<T> seq; private int hashCode; private Map<BalancedSequence<T>, Set<BalancedSequence<T>>> decompositionCache; /** * Constructs a new <code>BalancedSequence</code> representing the given <code>tree</code> structure. * * @param tree * the tree of <code>Artifact</code>s */ public BalancedSequence(T tree) { this.seq = new ArrayList<>(Collections.nCopies(tree.getTreeSize() * 2, null)); initSeq(tree, Integer.MAX_VALUE); } /** * Constructs a new <code>BalancedSequence</code> representing the given <code>tree</code> structure. * All nodes with depth <code>maxDepth</code> will be considered leaf nodes. * * @param tree * the tree of <code>Artifact</code>s * @param maxDepth * the maximum depth of nodes to consider */ public BalancedSequence(T tree, int maxDepth) { this.seq = new ArrayList<>(Collections.nCopies(getSize(tree, maxDepth) * 2, null)); initSeq(tree, maxDepth); } /** * Constructs a new <code>BalancedSequence</code> wrapping the given <code>seq</code>. * * @param seq * the sequence to wrap */ private BalancedSequence(List<T> seq) { this.seq = seq; this.hashCode = seq.hashCode(); } /** * Returns the number of nodes in the tree that have at most the given <code>depth</code>. * * @param tree * the tree whose nodes are to be counted * @param depth * the maximum depth of nodes to count * * @return the number of nodes */ private int getSize(T tree, int depth) { if (depth == 0) { return 1; } int num = 0; for (T t : tree.getChildren()) { num += getSize(t, depth - 1); } return num + 1; } /** * Initializes the <code>seq</code> array to the balanced sequence of the <code>tree</code>. * * @param tree * the tree whose balanced sequence is to be inserted in the <code>seq</code> array * @param maxDepth * the maximum depth of nodes to add */ private void initSeq(T tree, int maxDepth) { seq.set(0, tree); initSeqRec(tree, 1, 0, maxDepth); hashCode = seq.hashCode(); } /** * Initializes the <code>seq</code> array to the balanced sequence of the <code>tree</code>. * * @param tree * the tree whose balanced sequence is to be inserted in the <code>seq</code> array * @param index * the index for the 0 before the first child * @param currentDepth * the current depth in the tree * @param maxDepth * the maximum depth of nodes to add * * @return the index after the last index written to; */ private int initSeqRec(T tree, int index, int currentDepth, int maxDepth) { if (currentDepth < maxDepth) { for (T t : tree.getChildren()) { seq.set(index++, t); index = initSeqRec(t, index, currentDepth + 1, maxDepth); index++; } } return index; } /** * Partitions the balanced sequence into its head and tail. The head and tail of a balanced sequence <code>s</code> * are unique balanced sequences such that <code>s = 0 head(s) 1 tail(s)</code>. * * @return a <code>Pair</code> of (<code>head(s), tail(s)</code>) */ public Tuple<BalancedSequence<T>, BalancedSequence<T>> partition() { if (seq.size() == 0 || seq.size() == 2) { return Tuple.of(emptySeq(), emptySeq()); } int numZeros = 0; int index = 0; do { if (seq.get(index++) == null) { numZeros--; } else { numZeros++; } } while (numZeros > 0); BalancedSequence<T> head; BalancedSequence<T> tail; int headLength = index - 2; int tailLength = seq.size() - index; if (headLength != 0) { head = new BalancedSequence<>(seq.subList(1, 1 + headLength)); head.setDecompositionCache(decompositionCache); } else { head = emptySeq(); } if (tailLength != 0) { tail = new BalancedSequence<>(seq.subList(index, index + tailLength)); tail.setDecompositionCache(decompositionCache); } else { tail = emptySeq(); } return Tuple.of(head, tail); } /** * Returns an empty sequence. * * @param <T> * the type of the <code>Artifact</code> * @return an empty <code>BalancedSequence</code> */ @SuppressWarnings("unchecked") private static <T extends Artifact<T>> BalancedSequence<T> emptySeq() { return (BalancedSequence<T>) EMPTY_SEQ; } /** * An expensive part of the algorithm implemented in {@link BalancedSequence#lcs(BalancedSequence, BalancedSequence)} * is the decomposition of one <code>BalancedSequence</code> into a <code>Set</code> of <code>BalancedSequences</code>. * When performing multiple calls to {@link BalancedSequence#lcs(BalancedSequence, BalancedSequence)} for similar * <code>BalancedSequences</code> performance can be improved by using a persistent cache for all of them. * <p> * The given cache will be used and updated in the {@link #decompose()} method. It will also be passed to the * produced <code>BalancedSequences</code> in {@link #decompose()} and {@link #partition()}. * * @param decompositionCache * the decomposition cache */ public void setDecompositionCache(Map<BalancedSequence<T>, Set<BalancedSequence<T>>> decompositionCache) { this.decompositionCache = decompositionCache; } /** * Returns the decomposition of this balanced sequence. The decomposition of the empty balanced sequence is a set * containing only the empty balanced sequence. For all other sequences s the decomposition is the union of a * set containing s and the decompositions of head(s), tail(s) and the concatenation of head(s) and tail(s). * * @return the decomposition of this balanced sequence */ public Set<BalancedSequence<T>> decompose() { if (isEmpty()) { return Collections.singleton(emptySeq()); } Function<BalancedSequence<T>, Set<BalancedSequence<T>>> calcDecomp = seq -> { Set<BalancedSequence<T>> decomposition = new HashSet<>(Collections.singleton(seq)); Tuple<BalancedSequence<T>, BalancedSequence<T>> partition = partition(); BalancedSequence<T> head = partition.getX(); BalancedSequence<T> tail = partition.getY(); decomposition.addAll(head.decompose()); decomposition.addAll(tail.decompose()); decomposition.addAll(concatenate(head, tail).decompose()); return decomposition; }; if (decompositionCache != null) { return decompositionCache.computeIfAbsent(this, calcDecomp); } else { return calcDecomp.apply(this); } } /** * Concatenates the two given <code>BalancedSequence</code>s. * * @param left * the left part of the resulting <code>BalancedSequence</code> * @param right * the right part of the resulting <code>BalancedSequence</code> * @param <T> * the type of the <code>Artifact</code>s * * @return the concatenation result */ private static <T extends Artifact<T>> BalancedSequence<T> concatenate(BalancedSequence<T> left, BalancedSequence<T> right) { int length = left.seq.size() + right.seq.size(); if (length == 0) { return emptySeq(); } List<T> result = new ArrayList<>(length); result.addAll(left.seq); result.addAll(right.seq); BalancedSequence<T> res = new BalancedSequence<>(result); if (left.decompositionCache != null) { res.setDecompositionCache(left.decompositionCache); } else if (right.decompositionCache != null) { res.setDecompositionCache(right.decompositionCache); } return res; } /** * Returns the length (being the number of nodes of the tree it represents) of the longest common balanced sequence * between the balanced sequences <code>s</code> and <code>t</code>. * * @param s * the first <code>BalancedSequence</code> * @param t * the second <code>BalancedSequence</code> * @param <T> * the type of the <code>Artifact</code>s * * @return the length of the longest common balanced sequence */ public static <T extends Artifact<T>> Integer lcs(BalancedSequence<T> s, BalancedSequence<T> t) { Map<Integer, Integer> codes = new HashMap<>(); Integer[][] results; int code = 0; /* * The decompositions of s and t contain all balanced sequences that will be produced during the recursion * through lcsRec. We assign each balanced sequence an index into the results array. */ Set<BalancedSequence<T>> dec = new HashSet<>(s.decompose()); dec.addAll(t.decompose()); for (BalancedSequence<T> seq : dec) { codes.put(seq.hashCode(), code++); } /* * We build a triangular array because the lcs problem is symmetric (lcs(s, t) = lcs(t, s)). * The functions lookup and store are then used to correctly address sub-problems (the recursive cases in * lcsRec) using the codes of their balanced sequences. */ results = new Integer[codes.size()][]; for (int i = 0; i < results.length; i++) { results[i] = new Integer[i + 1]; } return lcsRec(s, t, codes, results); } /** * Recursive helper function for {@link BalancedSequence#lcs(BalancedSequence, BalancedSequence)}. Computes * the longest common balanced sequence between the balanced sequences <code>s</code> and <code>t</code> using * the <code>results</code> array to store results of sub-problems. <code>codes</code> must * contain mappings for the hashes of every balanced sequence in the decompositions of <code>s</code> and * <code>t</code> to an index in the array <code>results</code>. * * @param s * the first <code>BalancedSequence</code> * @param t * the seconds <code>BalancedSequence</code> * @param codes * the codes of the <code>BalancedSequence</code>s in the decompositions * @param results * the array of solutions to sub-problems * @param <T> * the type of the <code>Artifact</code>s * * @return the length of the longest common balanced sequence */ private static <T extends Artifact<T>> Integer lcsRec(BalancedSequence<T> s, BalancedSequence<T> t, Map<Integer, Integer> codes, Integer[][] results) { if (s.isEmpty() || t.isEmpty()) { return 0; } Integer codeS = codes.get(s.hashCode()); Integer codeT = codes.get(t.hashCode()); Integer result = lookup(codeS, codeT, results); if (result != null) { return result; } Tuple<BalancedSequence<T>, BalancedSequence<T>> sPart = s.partition(); Tuple<BalancedSequence<T>, BalancedSequence<T>> tPart = t.partition(); BalancedSequence<T> sHead = sPart.getX(); BalancedSequence<T> tHead = tPart.getX(); BalancedSequence<T> sTail = sPart.getY(); BalancedSequence<T> tTail = tPart.getY(); Integer a = lcsRec(concatenate(sHead, sTail), t, codes, results); Integer b = lcsRec(s, concatenate(tHead, tTail), codes, results); if (s.seq.get(0).matches(t.seq.get(0))) { Integer c = lcsRec(sHead, tHead, codes, results) + lcsRec(sTail, tTail, codes, results) + 1; result = max(a, max(b, c)); } else { result = max(a, b); } store(codeS, codeT, results, result); return result; } /** * Looks up the result of the lcs problem for the balanced sequences with the given codes in the * <code>results</code> array. * * @param codeA * the code of the first balanced sequence * @param codeB * the code of the second balanced sequence * @param results * the results array * * @return the solution to the lcs problem or <code>null</code> if <code>results</code> contains none */ private static Integer lookup(Integer codeA, Integer codeB, Integer[][] results) { return codeA.compareTo(codeB) > 0 ? results[codeA][codeB] : results[codeB][codeA]; } /** * Stores the solution to an lcs problem between the balanced sequences with the given codes in the * <code>results</code> array. * * @param codeA * the code of the first balanced sequence * @param codeB * the code of the second balanced sequence * @param results * the results array * @param result * the result to be stored */ private static void store(Integer codeA, Integer codeB, Integer[][] results, Integer result) { if (codeA.compareTo(codeB) > 0) { results[codeA][codeB] = result; } else { results[codeB][codeA] = result; } } /** * Returns the maximum of two <code>Integer</code>s. * * @param a * the first <code>Integer</code> * @param b * the second <code>Integer</code> * * @return the bigger of both <code>Integer</code>s */ private static Integer max(Integer a, Integer b) { return a.compareTo(b) > 0 ? a : b; } /** * Returns whether this <code>BalancedSequence</code> is empty. * * @return true iff the <code>BalancedSequence</code> is empty */ public boolean isEmpty() { return seq.isEmpty(); } /** * Returns the root of the tree of <code>Artifact</code>s this <code>BalancedSequence</code> represents. * * @return the root of the tree */ public T getRoot() { return seq.get(0); } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } BalancedSequence<?> that = (BalancedSequence<?>) o; return seq.equals(that.seq); } @Override public int hashCode() { return hashCode; } @Override public String toString() { return seq.stream().map(bit -> bit == null ? "1" : "0").reduce("", String::concat); } }