/*
* Diff Match and Patch
*
* Copyright 2006 Google Inc.
* http://code.google.com/p/google-diff-match-patch/
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.webcat.diff;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
/**
* This class performs a diff between two lists or arrays and returns a list of
* Diff objects that describe the differences.
*
* @param <T> the type of object in the list
*
* @author fraser@google.com (Neil Fraser)
* @author Tony Allevato
*/
public class Differ<T>
{
//~ Static/instance variables .............................................
private Comparator<T> comparator;
private DiffList<T> differences;
//~ Constructors ..........................................................
// ----------------------------------------------------------
public Differ(T[] text1, T[] text2)
{
this(Arrays.asList(text1), Arrays.asList(text2));
}
// ----------------------------------------------------------
public Differ(T[] text1, T[] text2, Comparator<T> comp)
{
this(Arrays.asList(text1), Arrays.asList(text2), comp);
}
// ----------------------------------------------------------
public Differ(List<T> text1, List<T> text2)
{
this(text1, text2, null);
}
// ----------------------------------------------------------
public Differ(List<T> text1, List<T> text2, Comparator<T> comp)
{
this.comparator = comp;
differences = doDiff(text1, text2);
}
//~ Methods ...............................................................
// ----------------------------------------------------------
public DiffList<T> getDifferences()
{
return differences;
}
// ----------------------------------------------------------
/**
* Find the differences between two texts. Simplifies the problem by
* stripping any common prefix or suffix off the texts before diffing.
*
* @param text1
* Old List<T> to be diffed.
* @param text2
* New List<T> to be diffed.
* @param checklines
* Speedup flag. If false, then don't run a line-level diff first
* to identify the changed areas. If true, then run a faster
* slightly less optimal diff.
* @return Linked List of Diff objects.
*/
private DiffList<T> doDiff(List<T> text1, List<T> text2)
{
// Check for null inputs.
if (text1 == null || text2 == null)
{
throw new IllegalArgumentException("Null inputs. (diff_main)");
}
// Check for equality (speedup).
DiffList<T> diffs;
if (text1.equals(text2))
{
diffs = new DiffList<T>();
if (text1.size() != 0)
{
diffs.add(new Diff<T>(Diff.Operation.EQUAL, text1));
}
return diffs;
}
// Trim off common prefix (speedup).
int commonlength = findCommonPrefix(text1, text2);
List<T> commonprefix = text1.subList(0, commonlength);
text1 = text1.subList(commonlength, text1.size());
text2 = text2.subList(commonlength, text2.size());
// Trim off common suffix (speedup).
commonlength = findCommonSuffix(text1, text2);
List<T> commonsuffix = text1.subList(text1.size() - commonlength, text1.size());
text1 = text1.subList(0, text1.size() - commonlength);
text2 = text2.subList(0, text2.size() - commonlength);
// Compute the diff on the middle block.
diffs = compute(text1, text2);
// Restore the prefix and suffix.
if (commonprefix.size() != 0)
{
diffs.addFirst(new Diff<T>(Diff.Operation.EQUAL, commonprefix));
}
if (commonsuffix.size() != 0)
{
diffs.addLast(new Diff<T>(Diff.Operation.EQUAL, commonsuffix));
}
//diff_cleanupMerge(diffs);
return diffs;
}
// ----------------------------------------------------------
/**
* Find the differences between two texts. Assumes that the texts do not
* have any common prefix or suffix.
*
* @param text1
* Old List<T> to be diffed.
* @param text2
* New List<T> to be diffed.
* @param checklines
* Speedup flag. If false, then don't run a line-level diff first
* to identify the changed areas. If true, then run a faster
* slightly less optimal diff.
* @param deadline
* Time when the diff should be complete by.
* @return Linked List of Diff objects.
*/
private DiffList<T> compute(List<T> text1, List<T> text2)
{
DiffList<T> diffs = new DiffList<T>();
if (text1.size() == 0)
{
// Just add some text (speedup).
diffs.add(new Diff<T>(Diff.Operation.INSERT, text2));
return diffs;
}
if (text2.size() == 0)
{
// Just delete some text (speedup).
diffs.add(new Diff<T>(Diff.Operation.DELETE, text1));
return diffs;
}
List<T> longtext = text1.size() > text2.size() ? text1 : text2;
List<T> shorttext = text1.size() > text2.size() ? text2 : text1;
int i = longtext.indexOf(shorttext);
if (i != -1)
{
// Shorter text is inside the longer text (speedup).
Diff.Operation op = (text1.size() > text2.size()) ? Diff.Operation.DELETE
: Diff.Operation.INSERT;
diffs.add(new Diff<T>(op, longtext.subList(0, i)));
diffs.add(new Diff<T>(Diff.Operation.EQUAL, shorttext));
diffs.add(new Diff<T>(op, longtext.subList(i + shorttext.size(), longtext.size())));
return diffs;
}
if (shorttext.size() == 1)
{
// Single character List<T>.
// After the previous speedup, the character can't be an equality.
diffs.add(new Diff<T>(Diff.Operation.DELETE, text1));
diffs.add(new Diff<T>(Diff.Operation.INSERT, text2));
return diffs;
}
longtext = shorttext = null; // Garbage collect.
// Check to see if the problem can be split in two.
List<T>[] hm = halfMatch(text1, text2);
if (hm != null)
{
// A half-match was found, sort out the return data.
List<T> text1_a = hm[0];
List<T> text1_b = hm[1];
List<T> text2_a = hm[2];
List<T> text2_b = hm[3];
List<T> mid_common = hm[4];
// Send both pairs off for separate processing.
DiffList<T> diffs_a = doDiff(text1_a, text2_a);
DiffList<T> diffs_b = doDiff(text1_b, text2_b);
// Merge the results.
diffs = diffs_a;
diffs.add(new Diff<T>(Diff.Operation.EQUAL, mid_common));
diffs.addAll(diffs_b);
return diffs;
}
return bisect(text1, text2);
}
// ----------------------------------------------------------
/**
* Find the 'middle snake' of a diff, split the problem in two and return
* the recursively constructed diff. See Myers 1986 paper: An O(ND)
* Difference Algorithm and Its Variations.
*
* @param text1
* Old List<T> to be diffed.
* @param text2
* New List<T> to be diffed.
* @param deadline
* Time at which to bail if not yet complete.
* @return LinkedList of Diff objects.
*/
private DiffList<T> bisect(List<T> text1, List<T> text2)
{
// Cache the text lengths to prevent multiple calls.
int text1_length = text1.size();
int text2_length = text2.size();
int max_d = (text1_length + text2_length + 1) / 2;
int v_offset = max_d;
int v_length = 2 * max_d;
int[] v1 = new int[v_length];
int[] v2 = new int[v_length];
for (int x = 0; x < v_length; x++)
{
v1[x] = -1;
v2[x] = -1;
}
v1[v_offset + 1] = 0;
v2[v_offset + 1] = 0;
int delta = text1_length - text2_length;
// If the total number of characters is odd, then the front path will
// collide with the reverse path.
boolean front = (delta % 2 != 0);
// Offsets for start and end of k loop.
// Prevents mapping of space beyond the grid.
int k1start = 0;
int k1end = 0;
int k2start = 0;
int k2end = 0;
for (int d = 0; d < max_d; d++)
{
// Walk the front path one step.
for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2)
{
int k1_offset = v_offset + k1;
int x1;
if (k1 == -d || k1 != d
&& v1[k1_offset - 1] < v1[k1_offset + 1])
{
x1 = v1[k1_offset + 1];
}
else
{
x1 = v1[k1_offset - 1] + 1;
}
int y1 = x1 - k1;
while (x1 < text1_length && y1 < text2_length
&& itemsEqual(text1.get(x1), text2.get(y1)))
{
x1++;
y1++;
}
v1[k1_offset] = x1;
if (x1 > text1_length)
{
// Ran off the right of the graph.
k1end += 2;
}
else if (y1 > text2_length)
{
// Ran off the bottom of the graph.
k1start += 2;
}
else if (front)
{
int k2_offset = v_offset + delta - k1;
if (k2_offset >= 0 && k2_offset < v_length
&& v2[k2_offset] != -1)
{
// Mirror x2 onto top-left coordinate system.
int x2 = text1_length - v2[k2_offset];
if (x1 >= x2)
{
// Overlap detected.
return bisectSplit(text1, text2, x1, y1);
}
}
}
}
// Walk the reverse path one step.
for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2)
{
int k2_offset = v_offset + k2;
int x2;
if (k2 == -d || k2 != d
&& v2[k2_offset - 1] < v2[k2_offset + 1])
{
x2 = v2[k2_offset + 1];
}
else
{
x2 = v2[k2_offset - 1] + 1;
}
int y2 = x2 - k2;
while (x2 < text1_length
&& y2 < text2_length
&& itemsEqual(text1.get(text1_length - x2 - 1),
text2.get(text2_length - y2 - 1)))
{
x2++;
y2++;
}
v2[k2_offset] = x2;
if (x2 > text1_length)
{
// Ran off the left of the graph.
k2end += 2;
}
else if (y2 > text2_length)
{
// Ran off the top of the graph.
k2start += 2;
}
else if (!front)
{
int k1_offset = v_offset + delta - k2;
if (k1_offset >= 0 && k1_offset < v_length
&& v1[k1_offset] != -1)
{
int x1 = v1[k1_offset];
int y1 = v_offset + x1 - k1_offset;
// Mirror x2 onto top-left coordinate system.
x2 = text1_length - x2;
if (x1 >= x2)
{
// Overlap detected.
return bisectSplit(text1, text2, x1, y1);
}
}
}
}
}
// Diff took too long and hit the deadline or
// number of diffs equals number of characters, no commonality at all.
DiffList<T> diffs = new DiffList<T>();
diffs.add(new Diff<T>(Diff.Operation.DELETE, text1));
diffs.add(new Diff<T>(Diff.Operation.INSERT, text2));
return diffs;
}
// ----------------------------------------------------------
/**
* Given the location of the 'middle snake', split the diff in two parts and
* recurse.
*
* @param text1
* Old List<T> to be diffed.
* @param text2
* New List<T> to be diffed.
* @param x
* Index of split point in text1.
* @param y
* Index of split point in text2.
* @param deadline
* Time at which to bail if not yet complete.
* @return LinkedList of Diff objects.
*/
private DiffList<T> bisectSplit(List<T> text1, List<T> text2,
int x, int y)
{
List<T> text1a = text1.subList(0, x);
List<T> text2a = text2.subList(0, y);
List<T> text1b = text1.subList(x, text1.size());
List<T> text2b = text2.subList(y, text2.size());
// Compute both diffs serially.
DiffList<T> diffs = doDiff(text1a, text2a);
DiffList<T> diffsb = doDiff(text1b, text2b);
diffs.addAll(diffsb);
return diffs;
}
// ----------------------------------------------------------
/**
* Determine the common prefix of two List<T>s
*
* @param text1
* First List<T>.
* @param text2
* Second List<T>.
* @return The number of characters common to the start of each List<T>.
*/
private int findCommonPrefix(List<T> text1, List<T> text2)
{
// Performance analysis: http://neil.fraser.name/news/2007/10/09/
int n = Math.min(text1.size(), text2.size());
for (int i = 0; i < n; i++)
{
if (!itemsEqual(text1.get(i), text2.get(i)))
{
return i;
}
}
return n;
}
// ----------------------------------------------------------
/**
* Determine the common suffix of two List<T>s
*
* @param text1
* First List<T>.
* @param text2
* Second List<T>.
* @return The number of characters common to the end of each List<T>.
*/
private int findCommonSuffix(List<T> text1, List<T> text2)
{
// Performance analysis: http://neil.fraser.name/news/2007/10/09/
int text1_length = text1.size();
int text2_length = text2.size();
int n = Math.min(text1_length, text2_length);
for (int i = 1; i <= n; i++)
{
if (!itemsEqual(text1.get(text1_length - i),
text2.get(text2_length - i)))
{
return i - 1;
}
}
return n;
}
// ----------------------------------------------------------
/**
* Determine if the suffix of one List<T> is the prefix of another.
*
* @param text1
* First List<T>.
* @param text2
* Second List<T>.
* @return The number of characters common to the end of the first List<T>
* and the start of the second List<T>.
*/
@SuppressWarnings("unused")
private int findCommonOverlap(List<T> text1, List<T> text2)
{
// Cache the text lengths to prevent multiple calls.
int text1_length = text1.size();
int text2_length = text2.size();
// Eliminate the null case.
if (text1_length == 0 || text2_length == 0)
{
return 0;
}
// Truncate the longer List<T>.
if (text1_length > text2_length)
{
text1 = text1.subList(text1_length - text2_length, text1.size());
}
else if (text1_length < text2_length)
{
text2 = text2.subList(0, text1_length);
}
int text_length = Math.min(text1_length, text2_length);
// Quick check for the worst case.
if (text1.equals(text2))
{
return text_length;
}
// Start by looking for a single character match
// and increase length until no match is found.
// Performance analysis: http://neil.fraser.name/news/2010/11/04/
int best = 0;
int length = 1;
while (true)
{
List<T> pattern = text1.subList(text_length - length, text1.size());
int found = text2.indexOf(pattern);
if (found == -1)
{
return best;
}
length += found;
if (found == 0 || listsEqual(
text1.subList(text_length - length, text1.size()),
text2.subList(0, length)))
{
best = length;
length++;
}
}
}
// ----------------------------------------------------------
/**
* Do the two texts share a subList which is at least half the length of
* the longer text? This speedup can produce non-minimal diffs.
*
* @param text1
* First List<T>.
* @param text2
* Second List<T>.
* @return Five element List<T> array, containing the prefix of text1, the
* suffix of text1, the prefix of text2, the suffix of text2 and the
* common middle. Or null if there was no match.
*/
private List<T>[] halfMatch(List<T> text1, List<T> text2)
{
List<T> longtext = text1.size() > text2.size() ? text1 : text2;
List<T> shorttext = text1.size() > text2.size() ? text2 : text1;
if (longtext.size() < 4 || shorttext.size() * 2 < longtext.size())
{
return null; // Pointless.
}
// First check if the second quarter is the seed for a half-match.
List<T>[] hm1 = halfMatchI(longtext, shorttext,
(longtext.size() + 3) / 4);
// Check again based on the third quarter.
List<T>[] hm2 = halfMatchI(longtext, shorttext,
(longtext.size() + 1) / 2);
List<T>[] hm;
if (hm1 == null && hm2 == null)
{
return null;
}
else if (hm2 == null)
{
hm = hm1;
}
else if (hm1 == null)
{
hm = hm2;
}
else
{
// Both matched. Select the longest.
hm = hm1[4].size() > hm2[4].size() ? hm1 : hm2;
}
// A half-match was found, sort out the return data.
if (text1.size() > text2.size())
{
return hm;
//return new List<T>[]{hm[0], hm[1], hm[2], hm[3], hm[4]};
}
else
{
return new List[] { hm[2], hm[3], hm[0], hm[1], hm[4] };
}
}
// ----------------------------------------------------------
/**
* Does a subList of shorttext exist within longtext such that the
* subList is at least half the length of longtext?
*
* @param longtext
* Longer List<T>.
* @param shorttext
* Shorter List<T>.
* @param i
* Start index of quarter length subList within longtext.
* @return Five element List<T> array, containing the prefix of longtext, the
* suffix of longtext, the prefix of shorttext, the suffix of
* shorttext and the common middle. Or null if there was no match.
*/
private List<T>[] halfMatchI(List<T> longtext, List<T> shorttext, int i)
{
// Start with a 1/4 length subList at position i as a seed.
List<T> seed = longtext.subList(i, i + longtext.size() / 4);
int j = -1;
List<T> best_common = new ArrayList<T>();
List<T> best_longtext_a = new ArrayList<T>();
List<T> best_longtext_b = new ArrayList<T>();
List<T> best_shorttext_a = new ArrayList<T>();
List<T> best_shorttext_b = new ArrayList<T>();
while ((j = listIndexOf(shorttext, seed, j + 1)) != -1)
{
int prefixLength = findCommonPrefix(longtext.subList(i, longtext.size()),
shorttext.subList(j, shorttext.size()));
int suffixLength = findCommonSuffix(longtext.subList(0, i),
shorttext.subList(0, j));
if (best_common.size() < suffixLength + prefixLength)
{
best_common = addLists(shorttext.subList(j - suffixLength, j),
shorttext.subList(j, j + prefixLength));
best_longtext_a = longtext.subList(0, i - suffixLength);
best_longtext_b = longtext.subList(i + prefixLength, longtext.size());
best_shorttext_a = shorttext.subList(0, j - suffixLength);
best_shorttext_b = shorttext.subList(j + prefixLength, shorttext.size());
}
}
if (best_common.size() * 2 >= longtext.size())
{
return new List[] { best_longtext_a, best_longtext_b,
best_shorttext_a, best_shorttext_b, best_common };
}
else
{
return null;
}
}
// ----------------------------------------------------------
private boolean itemsEqual(T item1, T item2)
{
return DiffUtils.itemsEqual(item1, item2, comparator);
}
// ----------------------------------------------------------
private List<T> addLists(List<T>... lists)
{
ArrayList<T> newList = new ArrayList<T>();
for (List<T> list : lists)
{
newList.addAll(list);
}
return newList;
}
// ----------------------------------------------------------
private boolean listsEqual(List<T> list1, List<T> list2)
{
return DiffUtils.listsEqual(list1, list2, comparator);
}
// ----------------------------------------------------------
private int listIndexOf(List<T> source, List<T> target, int start)
{
return DiffUtils.listIndexOf(source, target, start, comparator);
}
}