/* * Diff Match and Patch * * Copyright 2006 Google Inc. * http://code.google.com/p/google-diff-match-patch/ * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.webcat.diff; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.List; /** * This class performs a diff between two lists or arrays and returns a list of * Diff objects that describe the differences. * * @param <T> the type of object in the list * * @author fraser@google.com (Neil Fraser) * @author Tony Allevato */ public class Differ<T> { //~ Static/instance variables ............................................. private Comparator<T> comparator; private DiffList<T> differences; //~ Constructors .......................................................... // ---------------------------------------------------------- public Differ(T[] text1, T[] text2) { this(Arrays.asList(text1), Arrays.asList(text2)); } // ---------------------------------------------------------- public Differ(T[] text1, T[] text2, Comparator<T> comp) { this(Arrays.asList(text1), Arrays.asList(text2), comp); } // ---------------------------------------------------------- public Differ(List<T> text1, List<T> text2) { this(text1, text2, null); } // ---------------------------------------------------------- public Differ(List<T> text1, List<T> text2, Comparator<T> comp) { this.comparator = comp; differences = doDiff(text1, text2); } //~ Methods ............................................................... // ---------------------------------------------------------- public DiffList<T> getDifferences() { return differences; } // ---------------------------------------------------------- /** * Find the differences between two texts. Simplifies the problem by * stripping any common prefix or suffix off the texts before diffing. * * @param text1 * Old List<T> to be diffed. * @param text2 * New List<T> to be diffed. * @param checklines * Speedup flag. If false, then don't run a line-level diff first * to identify the changed areas. If true, then run a faster * slightly less optimal diff. * @return Linked List of Diff objects. */ private DiffList<T> doDiff(List<T> text1, List<T> text2) { // Check for null inputs. if (text1 == null || text2 == null) { throw new IllegalArgumentException("Null inputs. (diff_main)"); } // Check for equality (speedup). DiffList<T> diffs; if (text1.equals(text2)) { diffs = new DiffList<T>(); if (text1.size() != 0) { diffs.add(new Diff<T>(Diff.Operation.EQUAL, text1)); } return diffs; } // Trim off common prefix (speedup). int commonlength = findCommonPrefix(text1, text2); List<T> commonprefix = text1.subList(0, commonlength); text1 = text1.subList(commonlength, text1.size()); text2 = text2.subList(commonlength, text2.size()); // Trim off common suffix (speedup). commonlength = findCommonSuffix(text1, text2); List<T> commonsuffix = text1.subList(text1.size() - commonlength, text1.size()); text1 = text1.subList(0, text1.size() - commonlength); text2 = text2.subList(0, text2.size() - commonlength); // Compute the diff on the middle block. diffs = compute(text1, text2); // Restore the prefix and suffix. if (commonprefix.size() != 0) { diffs.addFirst(new Diff<T>(Diff.Operation.EQUAL, commonprefix)); } if (commonsuffix.size() != 0) { diffs.addLast(new Diff<T>(Diff.Operation.EQUAL, commonsuffix)); } //diff_cleanupMerge(diffs); return diffs; } // ---------------------------------------------------------- /** * Find the differences between two texts. Assumes that the texts do not * have any common prefix or suffix. * * @param text1 * Old List<T> to be diffed. * @param text2 * New List<T> to be diffed. * @param checklines * Speedup flag. If false, then don't run a line-level diff first * to identify the changed areas. If true, then run a faster * slightly less optimal diff. * @param deadline * Time when the diff should be complete by. * @return Linked List of Diff objects. */ private DiffList<T> compute(List<T> text1, List<T> text2) { DiffList<T> diffs = new DiffList<T>(); if (text1.size() == 0) { // Just add some text (speedup). diffs.add(new Diff<T>(Diff.Operation.INSERT, text2)); return diffs; } if (text2.size() == 0) { // Just delete some text (speedup). diffs.add(new Diff<T>(Diff.Operation.DELETE, text1)); return diffs; } List<T> longtext = text1.size() > text2.size() ? text1 : text2; List<T> shorttext = text1.size() > text2.size() ? text2 : text1; int i = longtext.indexOf(shorttext); if (i != -1) { // Shorter text is inside the longer text (speedup). Diff.Operation op = (text1.size() > text2.size()) ? Diff.Operation.DELETE : Diff.Operation.INSERT; diffs.add(new Diff<T>(op, longtext.subList(0, i))); diffs.add(new Diff<T>(Diff.Operation.EQUAL, shorttext)); diffs.add(new Diff<T>(op, longtext.subList(i + shorttext.size(), longtext.size()))); return diffs; } if (shorttext.size() == 1) { // Single character List<T>. // After the previous speedup, the character can't be an equality. diffs.add(new Diff<T>(Diff.Operation.DELETE, text1)); diffs.add(new Diff<T>(Diff.Operation.INSERT, text2)); return diffs; } longtext = shorttext = null; // Garbage collect. // Check to see if the problem can be split in two. List<T>[] hm = halfMatch(text1, text2); if (hm != null) { // A half-match was found, sort out the return data. List<T> text1_a = hm[0]; List<T> text1_b = hm[1]; List<T> text2_a = hm[2]; List<T> text2_b = hm[3]; List<T> mid_common = hm[4]; // Send both pairs off for separate processing. DiffList<T> diffs_a = doDiff(text1_a, text2_a); DiffList<T> diffs_b = doDiff(text1_b, text2_b); // Merge the results. diffs = diffs_a; diffs.add(new Diff<T>(Diff.Operation.EQUAL, mid_common)); diffs.addAll(diffs_b); return diffs; } return bisect(text1, text2); } // ---------------------------------------------------------- /** * Find the 'middle snake' of a diff, split the problem in two and return * the recursively constructed diff. See Myers 1986 paper: An O(ND) * Difference Algorithm and Its Variations. * * @param text1 * Old List<T> to be diffed. * @param text2 * New List<T> to be diffed. * @param deadline * Time at which to bail if not yet complete. * @return LinkedList of Diff objects. */ private DiffList<T> bisect(List<T> text1, List<T> text2) { // Cache the text lengths to prevent multiple calls. int text1_length = text1.size(); int text2_length = text2.size(); int max_d = (text1_length + text2_length + 1) / 2; int v_offset = max_d; int v_length = 2 * max_d; int[] v1 = new int[v_length]; int[] v2 = new int[v_length]; for (int x = 0; x < v_length; x++) { v1[x] = -1; v2[x] = -1; } v1[v_offset + 1] = 0; v2[v_offset + 1] = 0; int delta = text1_length - text2_length; // If the total number of characters is odd, then the front path will // collide with the reverse path. boolean front = (delta % 2 != 0); // Offsets for start and end of k loop. // Prevents mapping of space beyond the grid. int k1start = 0; int k1end = 0; int k2start = 0; int k2end = 0; for (int d = 0; d < max_d; d++) { // Walk the front path one step. for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { int k1_offset = v_offset + k1; int x1; if (k1 == -d || k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1]) { x1 = v1[k1_offset + 1]; } else { x1 = v1[k1_offset - 1] + 1; } int y1 = x1 - k1; while (x1 < text1_length && y1 < text2_length && itemsEqual(text1.get(x1), text2.get(y1))) { x1++; y1++; } v1[k1_offset] = x1; if (x1 > text1_length) { // Ran off the right of the graph. k1end += 2; } else if (y1 > text2_length) { // Ran off the bottom of the graph. k1start += 2; } else if (front) { int k2_offset = v_offset + delta - k1; if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { // Mirror x2 onto top-left coordinate system. int x2 = text1_length - v2[k2_offset]; if (x1 >= x2) { // Overlap detected. return bisectSplit(text1, text2, x1, y1); } } } } // Walk the reverse path one step. for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { int k2_offset = v_offset + k2; int x2; if (k2 == -d || k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]) { x2 = v2[k2_offset + 1]; } else { x2 = v2[k2_offset - 1] + 1; } int y2 = x2 - k2; while (x2 < text1_length && y2 < text2_length && itemsEqual(text1.get(text1_length - x2 - 1), text2.get(text2_length - y2 - 1))) { x2++; y2++; } v2[k2_offset] = x2; if (x2 > text1_length) { // Ran off the left of the graph. k2end += 2; } else if (y2 > text2_length) { // Ran off the top of the graph. k2start += 2; } else if (!front) { int k1_offset = v_offset + delta - k2; if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { int x1 = v1[k1_offset]; int y1 = v_offset + x1 - k1_offset; // Mirror x2 onto top-left coordinate system. x2 = text1_length - x2; if (x1 >= x2) { // Overlap detected. return bisectSplit(text1, text2, x1, y1); } } } } } // Diff took too long and hit the deadline or // number of diffs equals number of characters, no commonality at all. DiffList<T> diffs = new DiffList<T>(); diffs.add(new Diff<T>(Diff.Operation.DELETE, text1)); diffs.add(new Diff<T>(Diff.Operation.INSERT, text2)); return diffs; } // ---------------------------------------------------------- /** * Given the location of the 'middle snake', split the diff in two parts and * recurse. * * @param text1 * Old List<T> to be diffed. * @param text2 * New List<T> to be diffed. * @param x * Index of split point in text1. * @param y * Index of split point in text2. * @param deadline * Time at which to bail if not yet complete. * @return LinkedList of Diff objects. */ private DiffList<T> bisectSplit(List<T> text1, List<T> text2, int x, int y) { List<T> text1a = text1.subList(0, x); List<T> text2a = text2.subList(0, y); List<T> text1b = text1.subList(x, text1.size()); List<T> text2b = text2.subList(y, text2.size()); // Compute both diffs serially. DiffList<T> diffs = doDiff(text1a, text2a); DiffList<T> diffsb = doDiff(text1b, text2b); diffs.addAll(diffsb); return diffs; } // ---------------------------------------------------------- /** * Determine the common prefix of two List<T>s * * @param text1 * First List<T>. * @param text2 * Second List<T>. * @return The number of characters common to the start of each List<T>. */ private int findCommonPrefix(List<T> text1, List<T> text2) { // Performance analysis: http://neil.fraser.name/news/2007/10/09/ int n = Math.min(text1.size(), text2.size()); for (int i = 0; i < n; i++) { if (!itemsEqual(text1.get(i), text2.get(i))) { return i; } } return n; } // ---------------------------------------------------------- /** * Determine the common suffix of two List<T>s * * @param text1 * First List<T>. * @param text2 * Second List<T>. * @return The number of characters common to the end of each List<T>. */ private int findCommonSuffix(List<T> text1, List<T> text2) { // Performance analysis: http://neil.fraser.name/news/2007/10/09/ int text1_length = text1.size(); int text2_length = text2.size(); int n = Math.min(text1_length, text2_length); for (int i = 1; i <= n; i++) { if (!itemsEqual(text1.get(text1_length - i), text2.get(text2_length - i))) { return i - 1; } } return n; } // ---------------------------------------------------------- /** * Determine if the suffix of one List<T> is the prefix of another. * * @param text1 * First List<T>. * @param text2 * Second List<T>. * @return The number of characters common to the end of the first List<T> * and the start of the second List<T>. */ @SuppressWarnings("unused") private int findCommonOverlap(List<T> text1, List<T> text2) { // Cache the text lengths to prevent multiple calls. int text1_length = text1.size(); int text2_length = text2.size(); // Eliminate the null case. if (text1_length == 0 || text2_length == 0) { return 0; } // Truncate the longer List<T>. if (text1_length > text2_length) { text1 = text1.subList(text1_length - text2_length, text1.size()); } else if (text1_length < text2_length) { text2 = text2.subList(0, text1_length); } int text_length = Math.min(text1_length, text2_length); // Quick check for the worst case. if (text1.equals(text2)) { return text_length; } // Start by looking for a single character match // and increase length until no match is found. // Performance analysis: http://neil.fraser.name/news/2010/11/04/ int best = 0; int length = 1; while (true) { List<T> pattern = text1.subList(text_length - length, text1.size()); int found = text2.indexOf(pattern); if (found == -1) { return best; } length += found; if (found == 0 || listsEqual( text1.subList(text_length - length, text1.size()), text2.subList(0, length))) { best = length; length++; } } } // ---------------------------------------------------------- /** * Do the two texts share a subList which is at least half the length of * the longer text? This speedup can produce non-minimal diffs. * * @param text1 * First List<T>. * @param text2 * Second List<T>. * @return Five element List<T> array, containing the prefix of text1, the * suffix of text1, the prefix of text2, the suffix of text2 and the * common middle. Or null if there was no match. */ private List<T>[] halfMatch(List<T> text1, List<T> text2) { List<T> longtext = text1.size() > text2.size() ? text1 : text2; List<T> shorttext = text1.size() > text2.size() ? text2 : text1; if (longtext.size() < 4 || shorttext.size() * 2 < longtext.size()) { return null; // Pointless. } // First check if the second quarter is the seed for a half-match. List<T>[] hm1 = halfMatchI(longtext, shorttext, (longtext.size() + 3) / 4); // Check again based on the third quarter. List<T>[] hm2 = halfMatchI(longtext, shorttext, (longtext.size() + 1) / 2); List<T>[] hm; if (hm1 == null && hm2 == null) { return null; } else if (hm2 == null) { hm = hm1; } else if (hm1 == null) { hm = hm2; } else { // Both matched. Select the longest. hm = hm1[4].size() > hm2[4].size() ? hm1 : hm2; } // A half-match was found, sort out the return data. if (text1.size() > text2.size()) { return hm; //return new List<T>[]{hm[0], hm[1], hm[2], hm[3], hm[4]}; } else { return new List[] { hm[2], hm[3], hm[0], hm[1], hm[4] }; } } // ---------------------------------------------------------- /** * Does a subList of shorttext exist within longtext such that the * subList is at least half the length of longtext? * * @param longtext * Longer List<T>. * @param shorttext * Shorter List<T>. * @param i * Start index of quarter length subList within longtext. * @return Five element List<T> array, containing the prefix of longtext, the * suffix of longtext, the prefix of shorttext, the suffix of * shorttext and the common middle. Or null if there was no match. */ private List<T>[] halfMatchI(List<T> longtext, List<T> shorttext, int i) { // Start with a 1/4 length subList at position i as a seed. List<T> seed = longtext.subList(i, i + longtext.size() / 4); int j = -1; List<T> best_common = new ArrayList<T>(); List<T> best_longtext_a = new ArrayList<T>(); List<T> best_longtext_b = new ArrayList<T>(); List<T> best_shorttext_a = new ArrayList<T>(); List<T> best_shorttext_b = new ArrayList<T>(); while ((j = listIndexOf(shorttext, seed, j + 1)) != -1) { int prefixLength = findCommonPrefix(longtext.subList(i, longtext.size()), shorttext.subList(j, shorttext.size())); int suffixLength = findCommonSuffix(longtext.subList(0, i), shorttext.subList(0, j)); if (best_common.size() < suffixLength + prefixLength) { best_common = addLists(shorttext.subList(j - suffixLength, j), shorttext.subList(j, j + prefixLength)); best_longtext_a = longtext.subList(0, i - suffixLength); best_longtext_b = longtext.subList(i + prefixLength, longtext.size()); best_shorttext_a = shorttext.subList(0, j - suffixLength); best_shorttext_b = shorttext.subList(j + prefixLength, shorttext.size()); } } if (best_common.size() * 2 >= longtext.size()) { return new List[] { best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common }; } else { return null; } } // ---------------------------------------------------------- private boolean itemsEqual(T item1, T item2) { return DiffUtils.itemsEqual(item1, item2, comparator); } // ---------------------------------------------------------- private List<T> addLists(List<T>... lists) { ArrayList<T> newList = new ArrayList<T>(); for (List<T> list : lists) { newList.addAll(list); } return newList; } // ---------------------------------------------------------- private boolean listsEqual(List<T> list1, List<T> list2) { return DiffUtils.listsEqual(list1, list2, comparator); } // ---------------------------------------------------------- private int listIndexOf(List<T> source, List<T> target, int start) { return DiffUtils.listIndexOf(source, target, start, comparator); } }