/* * Diff Match and Patch * * Copyright 2006 Google Inc. * http://code.google.com/p/google-diff-match-patch/ * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ichi2.utils; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import java.util.Map; import java.util.Set; import java.util.Stack; import java.util.regex.Pattern; /** * Functions for diff, match and patch. Computes the difference between two texts to create a patch. Applies the patch * onto another text, allowing for errors. * * @author fraser@google.com (Neil Fraser) Class containing the diff, match and patch methods. Also contains the * behaviour settings. TODO if possible, remove the merging code, unneeded. */ public class DiffEngine { // Defaults. // Set these on your diff_match_patch instance to override the defaults. /** * Number of seconds to map a diff before giving up (0 for infinity). */ public float Diff_Timeout = 1.0f; /** * Cost of an empty edit operation in terms of edit characters. */ public short Diff_EditCost = 4; /** * The size beyond which the double-ended diff activates. Double-ending is twice as fast, but less accurate. */ public short Diff_DualThreshold = 32; /** * Colors for right and wrong answer */ private static final String RIGHT_COLOR = "#c0ffc0"; private static final String WRONG_COLOR = "#ffc0c0"; /** * Internal class for returning results from diff_linesToChars(). Other less paranoid languages just use a * three-element array. */ protected static class LinesToCharsResult { protected String chars1; protected String chars2; protected List<String> lineArray; protected LinesToCharsResult(String chars1, String chars2, List<String> lineArray) { this.chars1 = chars1; this.chars2 = chars2; this.lineArray = lineArray; } } // DIFF FUNCTIONS /** * The data structure representing a diff is a Linked list of Diff objects: {Diff(Operation.DELETE, "Hello"), * Diff(Operation.INSERT, "Goodbye"), Diff(Operation.EQUAL, " world.")} which means: delete "Hello", add "Goodbye" * and keep " world." */ public enum Operation { DELETE, INSERT, EQUAL } /** * Find the differences between two texts. Run a faster slightly less optimal diff This method allows the * 'checklines' of diff_main() to be optional. Most of the time checklines is wanted, so default to true. * * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @return Linked List of Diff objects. */ public LinkedList<DiffAction> diff_main(String text1, String text2) { return diff_main(text1, text2, true); } /** * Find the differences between two texts. Simplifies the problem by stripping any common prefix or suffix off the * texts before diffing. * * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @param checklines Speedup flag. If false, then don't run a line-level diff first to identify the changed areas. * If true, then run a faster slightly less optimal diff * @return Linked List of Diff objects. */ public LinkedList<DiffAction> diff_main(String text1, String text2, boolean checklines) { // Check for equality (speedup) LinkedList<DiffAction> diffs; if (text1.equals(text2)) { diffs = new LinkedList<DiffAction>(); diffs.add(new DiffAction(Operation.EQUAL, text1)); return diffs; } // Trim off common prefix (speedup) int commonlength = diff_commonPrefix(text1, text2); String commonprefix = text1.substring(0, commonlength); text1 = text1.substring(commonlength); text2 = text2.substring(commonlength); // Trim off common suffix (speedup) commonlength = diff_commonSuffix(text1, text2); String commonsuffix = text1.substring(text1.length() - commonlength); text1 = text1.substring(0, text1.length() - commonlength); text2 = text2.substring(0, text2.length() - commonlength); // Compute the diff on the middle block diffs = diff_compute(text1, text2, checklines); // Restore the prefix and suffix if (commonprefix.length() > 0) { diffs.addFirst(new DiffAction(Operation.EQUAL, commonprefix)); } if (commonsuffix.length() > 0) { diffs.addLast(new DiffAction(Operation.EQUAL, commonsuffix)); } diff_cleanupMerge(diffs); return diffs; } /** * Find the differences between two texts. Assumes that the texts do not have any common prefix or suffix. * * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @param checklines Speedup flag. If false, then don't run a line-level diff first to identify the changed areas. * If true, then run a faster slightly less optimal diff * @return Linked List of Diff objects. */ protected LinkedList<DiffAction> diff_compute(String text1, String text2, boolean checklines) { LinkedList<DiffAction> diffs = new LinkedList<DiffAction>(); if (text1.length() == 0) { // Just add some text (speedup) diffs.add(new DiffAction(Operation.INSERT, text2)); return diffs; } if (text2.length() == 0) { // Just delete some text (speedup) diffs.add(new DiffAction(Operation.DELETE, text1)); return diffs; } String longtext = text1.length() > text2.length() ? text1 : text2; String shorttext = text1.length() > text2.length() ? text2 : text1; int i = longtext.indexOf(shorttext); if (i != -1) { // Shorter text is inside the longer text (speedup) Operation op = (text1.length() > text2.length()) ? Operation.DELETE : Operation.INSERT; diffs.add(new DiffAction(op, longtext.substring(0, i))); diffs.add(new DiffAction(Operation.EQUAL, shorttext)); diffs.add(new DiffAction(op, longtext.substring(i + shorttext.length()))); return diffs; } longtext = shorttext = null; // Garbage collect // Check to see if the problem can be split in two. String[] hm = diff_halfMatch(text1, text2); if (hm != null) { // A half-match was found, sort out the return data. String text1_a = hm[0]; String text1_b = hm[1]; String text2_a = hm[2]; String text2_b = hm[3]; String mid_common = hm[4]; // Send both pairs off for separate processing. LinkedList<DiffAction> diffs_a = diff_main(text1_a, text2_a, checklines); LinkedList<DiffAction> diffs_b = diff_main(text1_b, text2_b, checklines); // Merge the results. diffs = diffs_a; diffs.add(new DiffAction(Operation.EQUAL, mid_common)); diffs.addAll(diffs_b); return diffs; } // Perform a real diff. if (checklines && (text1.length() < 100 || text2.length() < 100)) { checklines = false; // Too trivial for the overhead. } List<String> linearray = null; if (checklines) { // Scan the text on a line-by-line basis first. LinesToCharsResult b = diff_linesToChars(text1, text2); text1 = b.chars1; text2 = b.chars2; linearray = b.lineArray; } diffs = diff_map(text1, text2); if (diffs == null) { // No acceptable result. diffs = new LinkedList<DiffAction>(); diffs.add(new DiffAction(Operation.DELETE, text1)); diffs.add(new DiffAction(Operation.INSERT, text2)); } if (checklines) { // Convert the diff back to original text. diff_charsToLines(diffs, linearray); // Eliminate freak matches (e.g. blank lines) diff_cleanupSemantic(diffs); // Rediff any replacement blocks, this time character-by-character. // Add a dummy entry at the end. diffs.add(new DiffAction(Operation.EQUAL, "")); int count_delete = 0; int count_insert = 0; String text_delete = ""; String text_insert = ""; ListIterator<DiffAction> pointer = diffs.listIterator(); DiffAction thisDiff = pointer.next(); while (thisDiff != null) { switch (thisDiff.operation) { case INSERT: count_insert++; text_insert += thisDiff.text; break; case DELETE: count_delete++; text_delete += thisDiff.text; break; case EQUAL: // Upon reaching an equality, check for prior redundancies. if (count_delete >= 1 && count_insert >= 1) { // Delete the offending records and add the merged ones. pointer.previous(); for (int j = 0; j < count_delete + count_insert; j++) { pointer.previous(); pointer.remove(); } for (DiffAction newDiff : diff_main(text_delete, text_insert, false)) { pointer.add(newDiff); } } count_insert = 0; count_delete = 0; text_delete = ""; text_insert = ""; break; } thisDiff = pointer.hasNext() ? pointer.next() : null; } diffs.removeLast(); // Remove the dummy entry at the end. } return diffs; } /** * Split two texts into a list of strings. Reduce the texts to a string of hashes where each Unicode character * represents one line. * * @param text1 First string. * @param text2 Second string. * @return An object containing the encoded text1, the encoded text2 and the List of unique strings. The zeroth * element of the List of unique strings is intentionally blank. */ protected LinesToCharsResult diff_linesToChars(String text1, String text2) { List<String> lineArray = new ArrayList<String>(); Map<String, Integer> lineHash = new HashMap<String, Integer>(); // e.g. linearray[4] == "Hello\n" // e.g. linehash.get("Hello\n") == 4 // "\x00" is a valid character, but various debuggers don't like it. // So we'll insert a junk entry to avoid generating a null character. lineArray.add(""); String chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); String chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); return new LinesToCharsResult(chars1, chars2, lineArray); } /** * Split a text into a list of strings. Reduce the texts to a string of hashes where each Unicode character * represents one line. * * @param text String to encode. * @param lineArray List of unique strings. * @param lineHash Map of strings to indices. * @return Encoded string. */ private String diff_linesToCharsMunge(String text, List<String> lineArray, Map<String, Integer> lineHash) { int lineStart = 0; int lineEnd = -1; String line; StringBuilder chars = new StringBuilder(); // Walk the text, pulling out a substring for each line. // text.split('\n') would would temporarily double our memory footprint. // Modifying text would create many large strings to garbage collect. while (lineEnd < text.length() - 1) { lineEnd = text.indexOf('\n', lineStart); if (lineEnd == -1) { lineEnd = text.length() - 1; } line = text.substring(lineStart, lineEnd + 1); lineStart = lineEnd + 1; if (lineHash.containsKey(line)) { chars.append(String.valueOf((char) (int) lineHash.get(line))); } else { lineArray.add(line); lineHash.put(line, lineArray.size() - 1); chars.append(String.valueOf((char) (lineArray.size() - 1))); } } return chars.toString(); } /** * Rehydrate the text in a diff from a string of line hashes to real lines of text. * * @param diffs LinkedList of Diff objects. * @param lineArray List of unique strings. */ protected void diff_charsToLines(LinkedList<DiffAction> diffs, List<String> lineArray) { StringBuilder text; for (DiffAction diff : diffs) { text = new StringBuilder(); for (int y = 0; y < diff.text.length(); y++) { text.append(lineArray.get(diff.text.charAt(y))); } diff.text = text.toString(); } } /** * Explore the intersection points between the two texts. * * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @return LinkedList of Diff objects or null if no diff available. */ protected LinkedList<DiffAction> diff_map(String text1, String text2) { long ms_end = System.currentTimeMillis() + (long) (Diff_Timeout * 1000); // Cache the text lengths to prevent multiple calls. int text1_length = text1.length(); int text2_length = text2.length(); int max_d = text1_length + text2_length - 1; boolean doubleEnd = Diff_DualThreshold * 2 < max_d; List<Set<Long>> v_map1 = new ArrayList<Set<Long>>(); List<Set<Long>> v_map2 = new ArrayList<Set<Long>>(); Map<Integer, Integer> v1 = new HashMap<Integer, Integer>(); Map<Integer, Integer> v2 = new HashMap<Integer, Integer>(); v1.put(1, 0); v2.put(1, 0); int x, y; Long footstep = 0L; // Used to track overlapping paths. Map<Long, Integer> footsteps = new HashMap<Long, Integer>(); boolean done = false; // If the total number of characters is odd, then the front path will // collide with the reverse path. boolean front = ((text1_length + text2_length) % 2 == 1); for (int d = 0; d < max_d; d++) { // Bail out if timeout reached. if (Diff_Timeout > 0 && System.currentTimeMillis() > ms_end) { return null; } // Walk the front path one step. v_map1.add(new HashSet<Long>()); // Adds at index 'd'. for (int k = -d; k <= d; k += 2) { if (k == -d || k != d && v1.get(k - 1) < v1.get(k + 1)) { x = v1.get(k + 1); } else { x = v1.get(k - 1) + 1; } y = x - k; if (doubleEnd) { footstep = diff_footprint(x, y); if (front && (footsteps.containsKey(footstep))) { done = true; } if (!front) { footsteps.put(footstep, d); } } while (!done && x < text1_length && y < text2_length && text1.charAt(x) == text2.charAt(y)) { x++; y++; if (doubleEnd) { footstep = diff_footprint(x, y); if (front && (footsteps.containsKey(footstep))) { done = true; } if (!front) { footsteps.put(footstep, d); } } } v1.put(k, x); v_map1.get(d).add(diff_footprint(x, y)); if (x == text1_length && y == text2_length) { // Reached the end in single-path mode. return diff_path1(v_map1, text1, text2); } else if (done) { // Front path ran over reverse path. v_map2 = v_map2.subList(0, footsteps.get(footstep) + 1); LinkedList<DiffAction> a = diff_path1(v_map1, text1.substring(0, x), text2.substring(0, y)); a.addAll(diff_path2(v_map2, text1.substring(x), text2.substring(y))); return a; } } if (doubleEnd) { // Walk the reverse path one step. v_map2.add(new HashSet<Long>()); // Adds at index 'd'. for (int k = -d; k <= d; k += 2) { if (k == -d || k != d && v2.get(k - 1) < v2.get(k + 1)) { x = v2.get(k + 1); } else { x = v2.get(k - 1) + 1; } y = x - k; footstep = diff_footprint(text1_length - x, text2_length - y); if (!front && (footsteps.containsKey(footstep))) { done = true; } if (front) { footsteps.put(footstep, d); } while (!done && x < text1_length && y < text2_length && text1.charAt(text1_length - x - 1) == text2.charAt(text2_length - y - 1)) { x++; y++; footstep = diff_footprint(text1_length - x, text2_length - y); if (!front && (footsteps.containsKey(footstep))) { done = true; } if (front) { footsteps.put(footstep, d); } } v2.put(k, x); v_map2.get(d).add(diff_footprint(x, y)); if (done) { // Reverse path ran over front path. v_map1 = v_map1.subList(0, footsteps.get(footstep) + 1); LinkedList<DiffAction> a = diff_path1(v_map1, text1.substring(0, text1_length - x), text2.substring(0, text2_length - y)); a.addAll(diff_path2(v_map2, text1.substring(text1_length - x), text2.substring(text2_length - y))); return a; } } } } // Number of diffs equals number of characters, no commonality at all. return null; } /** * Work from the middle back to the start to determine the path. * * @param v_map List of path sets. * @param text1 Old string fragment to be diffed. * @param text2 New string fragment to be diffed. * @return LinkedList of Diff objects. */ protected LinkedList<DiffAction> diff_path1(List<Set<Long>> v_map, String text1, String text2) { LinkedList<DiffAction> path = new LinkedList<DiffAction>(); int x = text1.length(); int y = text2.length(); Operation last_op = null; for (int d = v_map.size() - 2; d >= 0; d--) { while (true) { if (v_map.get(d).contains(diff_footprint(x - 1, y))) { x--; if (last_op == Operation.DELETE) { path.getFirst().text = text1.charAt(x) + path.getFirst().text; } else { path.addFirst(new DiffAction(Operation.DELETE, text1.substring(x, x + 1))); } last_op = Operation.DELETE; break; } else if (v_map.get(d).contains(diff_footprint(x, y - 1))) { y--; if (last_op == Operation.INSERT) { path.getFirst().text = text2.charAt(y) + path.getFirst().text; } else { path.addFirst(new DiffAction(Operation.INSERT, text2.substring(y, y + 1))); } last_op = Operation.INSERT; break; } else { x--; y--; assert (text1.charAt(x) == text2.charAt(y)) : "No diagonal. Can't happen. (diff_path1)"; if (last_op == Operation.EQUAL) { path.getFirst().text = text1.charAt(x) + path.getFirst().text; } else { path.addFirst(new DiffAction(Operation.EQUAL, text1.substring(x, x + 1))); } last_op = Operation.EQUAL; } } } return path; } /** * Work from the middle back to the end to determine the path. * * @param v_map List of path sets. * @param text1 Old string fragment to be diffed. * @param text2 New string fragment to be diffed. * @return LinkedList of Diff objects. */ protected LinkedList<DiffAction> diff_path2(List<Set<Long>> v_map, String text1, String text2) { LinkedList<DiffAction> path = new LinkedList<DiffAction>(); int x = text1.length(); int y = text2.length(); Operation last_op = null; for (int d = v_map.size() - 2; d >= 0; d--) { while (true) { if (v_map.get(d).contains(diff_footprint(x - 1, y))) { x--; if (last_op == Operation.DELETE) { path.getLast().text += text1.charAt(text1.length() - x - 1); } else { path.addLast(new DiffAction(Operation.DELETE, text1.substring(text1.length() - x - 1, text1.length() - x))); } last_op = Operation.DELETE; break; } else if (v_map.get(d).contains(diff_footprint(x, y - 1))) { y--; if (last_op == Operation.INSERT) { path.getLast().text += text2.charAt(text2.length() - y - 1); } else { path.addLast(new DiffAction(Operation.INSERT, text2.substring(text2.length() - y - 1, text2.length() - y))); } last_op = Operation.INSERT; break; } else { x--; y--; assert (text1.charAt(text1.length() - x - 1) == text2.charAt(text2.length() - y - 1)) : "No diagonal. Can't happen. (diff_path2)"; if (last_op == Operation.EQUAL) { path.getLast().text += text1.charAt(text1.length() - x - 1); } else { path.addLast(new DiffAction(Operation.EQUAL, text1.substring(text1.length() - x - 1, text1.length() - x))); } last_op = Operation.EQUAL; } } } return path; } /** * Compute a good hash of two integers. * * @param x First int. * @param y Second int. * @return A long made up of both ints. */ protected long diff_footprint(int x, int y) { // The maximum size for a long is 9,223,372,036,854,775,807 // The maximum size for an int is 2,147,483,647 // Two ints fit nicely in one long. long result = x; result = result << 32; result += y; return result; } /** * Determine the common prefix of two strings * * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the start of each string. */ public int diff_commonPrefix(String text1, String text2) { // Performance analysis: http://neil.fraser.name/news/2007/10/09/ int n = Math.min(text1.length(), text2.length()); for (int i = 0; i < n; i++) { if (text1.charAt(i) != text2.charAt(i)) { return i; } } return n; } /** * Determine the common suffix of two strings * * @param text1 First string. * @param text2 Second string. * @return The number of characters common to the end of each string. */ public int diff_commonSuffix(String text1, String text2) { // Performance analysis: http://neil.fraser.name/news/2007/10/09/ int text1_length = text1.length(); int text2_length = text2.length(); int n = Math.min(text1_length, text2_length); for (int i = 1; i <= n; i++) { if (text1.charAt(text1_length - i) != text2.charAt(text2_length - i)) { return i - 1; } } return n; } /** * Do the two texts share a substring which is at least half the length of the longer text? * * @param text1 First string. * @param text2 Second string. * @return Five element String array, containing the prefix of text1, the suffix of text1, the prefix of text2, the * suffix of text2 and the common middle. Or null if there was no match. */ protected String[] diff_halfMatch(String text1, String text2) { String longtext = text1.length() > text2.length() ? text1 : text2; String shorttext = text1.length() > text2.length() ? text2 : text1; if (longtext.length() < 10 || shorttext.length() < 1) { return null; // Pointless. } // First check if the second quarter is the seed for a half-match. String[] hm1 = diff_halfMatchI(longtext, shorttext, (longtext.length() + 3) / 4); // Check again based on the third quarter. String[] hm2 = diff_halfMatchI(longtext, shorttext, (longtext.length() + 1) / 2); String[] hm; if (hm1 == null && hm2 == null) { return null; } else if (hm2 == null) { hm = hm1; } else if (hm1 == null) { hm = hm2; } else { // Both matched. Select the longest. hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2; } // A half-match was found, sort out the return data. if (text1.length() > text2.length()) { return hm; // return new String[]{hm[0], hm[1], hm[2], hm[3], hm[4]}; } else { return new String[] { hm[2], hm[3], hm[0], hm[1], hm[4] }; } } /** * Does a substring of shorttext exist within longtext such that the substring is at least half the length of * longtext? * * @param longtext Longer string. * @param shorttext Shorter string. * @param i Start index of quarter length substring within longtext. * @return Five element String array, containing the prefix of longtext, the suffix of longtext, the prefix of * shorttext, the suffix of shorttext and the common middle. Or null if there was no match. */ private String[] diff_halfMatchI(String longtext, String shorttext, int i) { // Start with a 1/4 length substring at position i as a seed. String seed = longtext.substring(i, i + longtext.length() / 4); int j = -1; String best_common = ""; String best_longtext_a = "", best_longtext_b = ""; String best_shorttext_a = "", best_shorttext_b = ""; while ((j = shorttext.indexOf(seed, j + 1)) != -1) { int prefixLength = diff_commonPrefix(longtext.substring(i), shorttext.substring(j)); int suffixLength = diff_commonSuffix(longtext.substring(0, i), shorttext.substring(0, j)); if (best_common.length() < suffixLength + prefixLength) { best_common = shorttext.substring(j - suffixLength, j) + shorttext.substring(j, j + prefixLength); best_longtext_a = longtext.substring(0, i - suffixLength); best_longtext_b = longtext.substring(i + prefixLength); best_shorttext_a = shorttext.substring(0, j - suffixLength); best_shorttext_b = shorttext.substring(j + prefixLength); } } if (best_common.length() >= longtext.length() / 2) { return new String[] { best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common }; } else { return null; } } /** * Reduce the number of edits by eliminating semantically trivial equalities. * * @param diffs LinkedList of Diff objects. */ public void diff_cleanupSemantic(LinkedList<DiffAction> diffs) { if (diffs.isEmpty()) { return; } boolean changes = false; Stack<DiffAction> equalities = new Stack<DiffAction>(); // Stack of qualities. String lastequality = null; // Always equal to equalities.lastElement().text ListIterator<DiffAction> pointer = diffs.listIterator(); // Number of characters that changed prior to the equality. int length_changes1 = 0; // Number of characters that changed after the equality. int length_changes2 = 0; DiffAction thisDiff = pointer.next(); while (thisDiff != null) { if (thisDiff.operation == Operation.EQUAL) { // equality found equalities.push(thisDiff); length_changes1 = length_changes2; length_changes2 = 0; lastequality = thisDiff.text; } else { // an insertion or deletion length_changes2 += thisDiff.text.length(); if (lastequality != null && (lastequality.length() <= length_changes1) && (lastequality.length() <= length_changes2)) { // System.out.println("Splitting: '" + lastequality + "'"); // Walk back to offending equality. while (thisDiff != equalities.lastElement()) { thisDiff = pointer.previous(); } pointer.next(); // Replace equality with a delete. pointer.set(new DiffAction(Operation.DELETE, lastequality)); // Insert a corresponding an insert. pointer.add(new DiffAction(Operation.INSERT, lastequality)); equalities.pop(); // Throw away the equality we just deleted. if (!equalities.empty()) { // Throw away the previous equality (it needs to be reevaluated). equalities.pop(); } if (equalities.empty()) { // There are no previous equalities, walk back to the start. while (pointer.hasPrevious()) { pointer.previous(); } } else { // There is a safe equality we can fall back to. thisDiff = equalities.lastElement(); while (thisDiff != pointer.previous()) { // Intentionally empty loop. } } length_changes1 = 0; // Reset the counters. length_changes2 = 0; lastequality = null; changes = true; } } thisDiff = pointer.hasNext() ? pointer.next() : null; } if (changes) { diff_cleanupMerge(diffs); } diff_cleanupSemanticLossless(diffs); } /** * Look for single edits surrounded on both sides by equalities which can be shifted sideways to align the edit to a * word boundary. e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. * * @param diffs LinkedList of Diff objects. */ public void diff_cleanupSemanticLossless(LinkedList<DiffAction> diffs) { String equality1, edit, equality2; String commonString; int commonOffset; int score, bestScore; String bestEquality1, bestEdit, bestEquality2; // Create a new iterator at the start. ListIterator<DiffAction> pointer = diffs.listIterator(); DiffAction prevDiff = pointer.hasNext() ? pointer.next() : null; DiffAction thisDiff = pointer.hasNext() ? pointer.next() : null; DiffAction nextDiff = pointer.hasNext() ? pointer.next() : null; // Intentionally ignore the first and last element (don't need checking). while (nextDiff != null) { if (prevDiff.operation == Operation.EQUAL && nextDiff.operation == Operation.EQUAL) { // This is a single edit surrounded by equalities. equality1 = prevDiff.text; edit = thisDiff.text; equality2 = nextDiff.text; // First, shift the edit as far left as possible. commonOffset = diff_commonSuffix(equality1, edit); if (commonOffset != 0) { commonString = edit.substring(edit.length() - commonOffset); equality1 = equality1.substring(0, equality1.length() - commonOffset); edit = commonString + edit.substring(0, edit.length() - commonOffset); equality2 = commonString + equality2; } // Second, step character by character right, looking for the best fit. bestEquality1 = equality1; bestEdit = edit; bestEquality2 = equality2; bestScore = diff_cleanupSemanticScore(equality1, edit) + diff_cleanupSemanticScore(edit, equality2); while (edit.length() > 0 && equality2.length() > 0 && edit.charAt(0) == equality2.charAt(0)) { equality1 += edit.charAt(0); edit = edit.substring(1) + equality2.charAt(0); equality2 = equality2.substring(1); score = diff_cleanupSemanticScore(equality1, edit) + diff_cleanupSemanticScore(edit, equality2); // The >= encourages trailing rather than leading whitespace on edits. if (score >= bestScore) { bestScore = score; bestEquality1 = equality1; bestEdit = edit; bestEquality2 = equality2; } } if (!prevDiff.text.equals(bestEquality1)) { // We have an improvement, save it back to the diff. if (bestEquality1.length() > 0) { prevDiff.text = bestEquality1; } else { pointer.previous(); // Walk past nextDiff. pointer.previous(); // Walk past thisDiff. pointer.previous(); // Walk past prevDiff. pointer.remove(); // Delete prevDiff. pointer.next(); // Walk past thisDiff. pointer.next(); // Walk past nextDiff. } thisDiff.text = bestEdit; if (bestEquality2.length() > 0) { nextDiff.text = bestEquality2; } else { pointer.remove(); // Delete nextDiff. nextDiff = thisDiff; thisDiff = prevDiff; } } } prevDiff = thisDiff; thisDiff = nextDiff; nextDiff = pointer.hasNext() ? pointer.next() : null; } } /** * Given two strings, compute a score representing whether the internal boundary falls on logical boundaries. Scores * range from 5 (best) to 0 (worst). * * @param one First string. * @param two Second string. * @return The score. */ private int diff_cleanupSemanticScore(String one, String two) { if (one.length() == 0 || two.length() == 0) { // Edges are the best. return 5; } // Each port of this function behaves slightly differently due to // subtle differences in each language's definition of things like // 'whitespace'. Since this function's purpose is largely cosmetic, // the choice has been made to use each language's native features // rather than force total conformity. int score = 0; // One point for non-alphanumeric. if (!Character.isLetterOrDigit(one.charAt(one.length() - 1)) || !Character.isLetterOrDigit(two.charAt(0))) { score++; // Two points for whitespace. if (Character.isWhitespace(one.charAt(one.length() - 1)) || Character.isWhitespace(two.charAt(0))) { score++; // Three points for line breaks. if (Character.getType(one.charAt(one.length() - 1)) == Character.CONTROL || Character.getType(two.charAt(0)) == Character.CONTROL) { score++; // Four points for blank lines. if (BLANKLINEEND.matcher(one).find() || BLANKLINESTART.matcher(two).find()) { score++; } } } } return score; } private Pattern BLANKLINEEND = Pattern.compile("\\n\\r?\\n\\Z", Pattern.DOTALL); private Pattern BLANKLINESTART = Pattern.compile("\\A\\r?\\n\\r?\\n", Pattern.DOTALL); /** * Reorder and merge like edit sections. Merge equalities. Any edit section can move as long as it doesn't cross an * equality. * * @param diffs LinkedList of Diff objects. */ public void diff_cleanupMerge(LinkedList<DiffAction> diffs) { diffs.add(new DiffAction(Operation.EQUAL, "")); // Add a dummy entry at the end. ListIterator<DiffAction> pointer = diffs.listIterator(); int count_delete = 0; int count_insert = 0; String text_delete = ""; String text_insert = ""; DiffAction thisDiff = pointer.next(); DiffAction prevEqual = null; int commonlength; while (thisDiff != null) { switch (thisDiff.operation) { case INSERT: count_insert++; text_insert += thisDiff.text; prevEqual = null; break; case DELETE: count_delete++; text_delete += thisDiff.text; prevEqual = null; break; case EQUAL: if (count_delete != 0 || count_insert != 0) { // Delete the offending records. pointer.previous(); // Reverse direction. while (count_delete-- > 0) { pointer.previous(); pointer.remove(); } while (count_insert-- > 0) { pointer.previous(); pointer.remove(); } if (count_delete != 0 && count_insert != 0) { // Factor out any common prefixies. commonlength = diff_commonPrefix(text_insert, text_delete); if (commonlength != 0) { if (pointer.hasPrevious()) { thisDiff = pointer.previous(); assert thisDiff.operation == Operation.EQUAL : "Previous diff should have been an equality."; thisDiff.text += text_insert.substring(0, commonlength); pointer.next(); } else { pointer.add(new DiffAction(Operation.EQUAL, text_insert.substring(0, commonlength))); } text_insert = text_insert.substring(commonlength); text_delete = text_delete.substring(commonlength); } // Factor out any common suffixies. commonlength = diff_commonSuffix(text_insert, text_delete); if (commonlength != 0) { thisDiff = pointer.next(); thisDiff.text = text_insert.substring(text_insert.length() - commonlength) + thisDiff.text; text_insert = text_insert.substring(0, text_insert.length() - commonlength); text_delete = text_delete.substring(0, text_delete.length() - commonlength); pointer.previous(); } } // Insert the merged records. if (text_delete.length() > 0) { pointer.add(new DiffAction(Operation.DELETE, text_delete)); } if (text_insert.length() > 0) { pointer.add(new DiffAction(Operation.INSERT, text_insert)); } // Step forward to the equality. thisDiff = pointer.hasNext() ? pointer.next() : null; } else if (prevEqual != null) { // Merge this equality with the previous one. prevEqual.text += thisDiff.text; pointer.remove(); thisDiff = pointer.previous(); pointer.next(); // Forward direction } count_insert = 0; count_delete = 0; text_delete = ""; text_insert = ""; prevEqual = thisDiff; break; } thisDiff = pointer.hasNext() ? pointer.next() : null; } // System.out.println(diff); if (diffs.getLast().text.length() == 0) { diffs.removeLast(); // Remove the dummy entry at the end. } /* * Second pass: look for single edits surrounded on both sides by equalities which can be shifted sideways to * eliminate an equality. e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC */ boolean changes = false; // Create a new iterator at the start. // (As opposed to walking the current one back.) pointer = diffs.listIterator(); DiffAction prevDiff = pointer.hasNext() ? pointer.next() : null; thisDiff = pointer.hasNext() ? pointer.next() : null; DiffAction nextDiff = pointer.hasNext() ? pointer.next() : null; // Intentionally ignore the first and last element (don't need checking). while (nextDiff != null) { if (prevDiff.operation == Operation.EQUAL && nextDiff.operation == Operation.EQUAL) { // This is a single edit surrounded by equalities. if (thisDiff.text.endsWith(prevDiff.text)) { // Shift the edit over the previous equality. thisDiff.text = prevDiff.text + thisDiff.text.substring(0, thisDiff.text.length() - prevDiff.text.length()); nextDiff.text = prevDiff.text + nextDiff.text; pointer.previous(); // Walk past nextDiff. pointer.previous(); // Walk past thisDiff. pointer.previous(); // Walk past prevDiff. pointer.remove(); // Delete prevDiff. pointer.next(); // Walk past thisDiff. thisDiff = pointer.next(); // Walk past nextDiff. nextDiff = pointer.hasNext() ? pointer.next() : null; changes = true; } else if (thisDiff.text.startsWith(nextDiff.text)) { // Shift the edit over the next equality. prevDiff.text += nextDiff.text; thisDiff.text = thisDiff.text.substring(nextDiff.text.length()) + nextDiff.text; pointer.remove(); // Delete nextDiff. nextDiff = pointer.hasNext() ? pointer.next() : null; changes = true; } } prevDiff = thisDiff; thisDiff = nextDiff; nextDiff = pointer.hasNext() ? pointer.next() : null; } // If shifts were made, the diff needs reordering and another shift sweep. if (changes) { diff_cleanupMerge(diffs); } } /** * Convert a Diff list into a pretty HTML report. * * @param diffs LinkedList of Diff objects. * @return HTML representation. */ public String diff_prettyHtml(LinkedList<DiffAction> diffs) { StringBuilder html = new StringBuilder(); for (DiffAction aDiff : diffs) { String text = aDiff.text.replace("&", "&").replace("<", "<").replace(">", ">") .replace("\n", "<br>"); switch (aDiff.operation) { case INSERT: String spaces = ""; int l = text.length(); for (int j = 0; j < l; j++) { spaces += " "; } html.append("<span style=\"background:" + WRONG_COLOR + ";\">").append(spaces).append("</span>"); break; case DELETE: html.append("<span style=\"background:" + WRONG_COLOR + ";\">").append(text).append("</span>"); break; case EQUAL: html.append("<span style=\"background:" + RIGHT_COLOR + ";\">").append(text).append("</span>"); break; } if (aDiff.operation != Operation.DELETE) { aDiff.text.length(); } } return html.toString(); } /** * Class representing one diff operation. */ public static class DiffAction { /** * One of: INSERT, DELETE or EQUAL. */ public Operation operation; /** * The text associated with this diff operation. */ public String text; /** * Constructor. Initializes the diff with the provided values. * * @param operation One of INSERT, DELETE or EQUAL. * @param text The text being applied. */ public DiffAction(Operation operation, String text) { // Construct a diff with the specified operation and text. this.operation = operation; this.text = text; } /** * Display a human-readable version of this Diff. * * @return text version. */ @Override public String toString() { String prettyText = text.replace('\n', '\u00b6'); return "Diff(" + operation + ",\"" + prettyText + "\")"; } /** * Is this Diff equivalent to another Diff? * * @param d Another Diff to compare against. * @return true or false. */ @Override public boolean equals(Object d) { try { return (((DiffAction) d).operation == operation) && (((DiffAction) d).text.equals(text)); } catch (ClassCastException e) { return false; } } } }