/** * Copyright (c) 2009--2012 Red Hat, Inc. * * This software is licensed to you under the GNU General Public License, * version 2 (GPLv2). There is NO WARRANTY for this software, express or * implied, including the implied warranties of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2 * along with this software; if not, see * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. * * Red Hat trademarks are not licensed under GPLv2. No permission is * granted to use or replicate Red Hat trademarks that are incorporated * in this software or its documentation. */ package com.redhat.rhn.common.filediff; import java.util.ArrayList; import java.util.Collections; import java.util.List; /** * Java file diff using Eugene W. Myers's algorithm as described in * "An O(ND) Difference Algorithm and Its Variations". * @version $Rev$ */ public class Differ { private final Trace head; private Trace beforeCurrent; private int bestSoFar; private static final int NUMBEROFTRACESTOKEEP = 1000; private final List<Integer> matches = new ArrayList<Integer>(); /** * @param oldLength The length of the old file * @param newLength The length of the new file */ public Differ(int oldLength, int newLength) { //we need a head element in order to do deletions. head = new Trace(0, 0); //the head of the linked list. head.setNext(new Trace(oldLength, newLength)); beforeCurrent = head; bestSoFar = 0; } /** * @param oldFile The old(first, from) file * @param newFile The new(second, to) file * @return A list of Hunks representing the differences. */ public List<Hunk> diff(String[] oldFile, String[] newFile) { List<Hunk> retval = null; while (retval == null) { retval = step(oldFile, newFile); } return retval; } /** * The crux of the optimization for this algorithm lies in the fact that we * will step through all of the traces in parallel rather than recursively. * This allows us to delete traces that are unlikely be the most optimal. * * We only consider the best {@value #NUMBEROFTRACESTOKEEP} traces at any * given time. This is to prevent the memory required from growing * exponentially in large files. The resulting trace is always guaranteed to * be *correct*, but maybe not *optimal*. * * This will call the step function on Trace for every current trace we * have. It will delete traces that are unlikely to be optimal. * * @param oldFile * The old(first, from) file * @param newFile * The new(second, to) file * @return A list of Hunks representing the differences. null if we need to * step again */ private List<Hunk> step(String[] oldFile, String[] newFile) { beforeCurrent = head; boolean forked; int minimumMatchValue = 0; // get a list of all match values for all traces while (beforeCurrent.next() != null) { beforeCurrent = beforeCurrent.next(); matches.add(beforeCurrent.getMatches()); } // if we have too many traces find the match number you need to beat to // remain valid if (matches.size() > NUMBEROFTRACESTOKEEP) { Collections.sort(matches); // sort ascending minimumMatchValue = matches.get(matches.size() - NUMBEROFTRACESTOKEEP); } // reset things before the "real" iteration matches.clear(); beforeCurrent = head; while (beforeCurrent.next() != null) { // delete impossible and unlikely traces. if (beforeCurrent.next().getMatches() < minimumMatchValue || bestSoFar > beforeCurrent.next().bestPossible()) { beforeCurrent.setNext(beforeCurrent.next().next()); } else { forked = beforeCurrent.next().step(oldFile, newFile); //With the step algorithm, the first one to reach the end of //both files is the winner! if (beforeCurrent.next().isDone()) { return beforeCurrent.next().createHunks(oldFile, newFile); } //update bestSoFar if (beforeCurrent.next().getMatches() > bestSoFar) { bestSoFar = beforeCurrent.next().getMatches(); } //if it forked, there is a new element in the linked list //that has already been dealt with, so skip it. if (forked) { beforeCurrent = beforeCurrent.next().next(); } else { beforeCurrent = beforeCurrent.next(); } } } return null; //null means we need to step again. } }