/*
* This file is part of "SnipSnap Wiki/Weblog".
*
* Adapted from <a href="http://javacook.darwinsys.com/">Ian Darwin's Java Cookbook</a>
* See Diff.java for more information
*
* Result was printed, now the result is a list of ChangeInfo objects.
* Input was from two files, now diff takes two Strings.
* Changed to Java coding style.
*
* diff Text file difference utility.
* ---- Copyright 1987, 1989 by Donald C. Lindsay,
* School of Computer Science, Carnegie Mellon University.
* Copyright 1982 by Symbionics.
* Use without fee is permitted when not for direct commercial
* advantage, and when credit to the source is given. Other uses
* require specific permission.
*
* Adaption Copyright (c) 2002 Stephan J. Schmidt, Matthias L. Jugel
* All Rights Reserved.
*
* Please visit http://snipsnap.org/ for updates and contact.
*
* --LICENSE NOTICE--
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
* --LICENSE NOTICE--
*/
package org.olat.modules.wiki.versioning.diff;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import org.olat.modules.wiki.versioning.ChangeInfo;
/**
* Returns differences between two text strings
*
* @author Stephan J. Schmidt
*
* @author Ian F. Darwin, Java version
* @author D. C. Lindsay, C version (1982-1987)
*/
public class CookbookDiff {
private ChangeInfo last;
private List<String> lines = new ArrayList<String>();
/** block len > any possible real block len */
private final int UNREAL = Integer.MAX_VALUE;
/** Keeps track of information about file1 and file2 */
private SourceInfo oldInfo, newInfo;
/** blocklen is the info about found blocks. It will be set to 0, except
* at the line#s where blocks start in the old file. At these places it
* will be set to the # of lines in the block. During printout ,
* this # will be reset to -1 if the block is printed as a MOVE block
* (because the printout phase will encounter the block twice, but
* must only print it once.)
* The array declarations are to MAXLINECOUNT+2 so that we can have two
* extra lines (pseudolines) at line# 0 and line# MAXLINECOUNT+1
* (or less).
*/
private int blocklen[];
/** Do one string comparison. Called with both strings. */
public List<ChangeInfo> diff(String oldText, String newText) {
Node.panchor = null;
oldInfo = new SourceInfo();
newInfo = new SourceInfo();
/* we don't process until we know both files really do exist. */
inputScan(oldText, oldInfo);
inputScan(newText, newInfo);
/* Now that we've read all the lines, allocate some arrays.
*/
oldInfo.alloc();
newInfo.alloc();
blocklen = new int[(oldInfo.maxLine > newInfo.maxLine ?
oldInfo.maxLine : newInfo.maxLine) + 2];
/* Now do the work, and print the results. */
transform();
return printOut();
}
/**
* inputscan Reads the file specified by pinfo.file.
* --------- Places the lines of that file in the symbol table.
* Sets pinfo.maxLine to the number of lines found.
*/
void inputScan(String input, SourceInfo pinfo) {
pinfo.maxLine = 0;
StringTokenizer tokenizer = new StringTokenizer(input, "\n\r");
while (tokenizer.hasMoreTokens()) {
String line = tokenizer.nextToken();
storeLine(line, pinfo);
}
}
/**
* storeline Places line into symbol table.
* --------- Expects pinfo.maxLine initted: increments.
* Places symbol table handle in pinfo.ymbol.
* Expects pinfo is either oldinfo or newinfo.
*/
void storeLine(String linebuffer, SourceInfo pinfo) {
int linenum = ++pinfo.maxLine; /* note, no line zero */
if (linenum > SourceInfo.MAXLINECOUNT) {
System.err.println("MAXLINECOUNT exceeded, must stop.");
}
pinfo.symbol[linenum] =
Node.addSymbol(linebuffer, pinfo == oldInfo, linenum);
}
/*
* transform
* Analyzes the file differences and leaves its findings in
* the global arrays oldinfo.other, newinfo.other, and blocklen.
* Expects both files in symtab.
* Expects valid "maxLine" and "symbol" in oldinfo and newinfo.
*/
void transform() {
int oldline, newline;
int oldmax = oldInfo.maxLine + 2; /* Count pseudolines at */
int newmax = newInfo.maxLine + 2; /* ..front and rear of file */
for (oldline = 0; oldline < oldmax; oldline++)
oldInfo.other[oldline] = -1;
for (newline = 0; newline < newmax; newline++)
newInfo.other[newline] = -1;
scanUnique(); /* scan for lines used once in both files */
scanAfter(); /* scan past sure-matches for non-unique blocks */
scanBefore(); /* scan backwards from sure-matches */
scanBlocks(); /* find the fronts and lengths of blocks */
}
/*
* scanunique
* Scans for lines which are used exactly once in each file.
* Expects both files in symtab, and oldinfo and newinfo valid.
* The appropriate "other" array entries are set to the line# in
* the other file.
* Claims pseudo-lines at 0 and XXXinfo.maxLine+1 are unique.
*/
void scanUnique() {
int oldline, newline;
Node psymbol;
for (newline = 1; newline <= newInfo.maxLine; newline++) {
psymbol = newInfo.symbol[newline];
if (psymbol.symbolIsUnique()) { // 1 use in each file
oldline = psymbol.linenum;
newInfo.other[newline] = oldline; // record 1-1 map
oldInfo.other[oldline] = newline;
}
}
newInfo.other[0] = 0;
oldInfo.other[0] = 0;
newInfo.other[newInfo.maxLine + 1] = oldInfo.maxLine + 1;
oldInfo.other[oldInfo.maxLine + 1] = newInfo.maxLine + 1;
}
/*
* scanafter
* Expects both files in symtab, and oldinfo and newinfo valid.
* Expects the "other" arrays contain positive #s to indicate
* lines that are unique in both files.
* For each such pair of places, scans past in each file.
* Contiguous groups of lines that match non-uniquely are
* taken to be good-enough matches, and so marked in "other".
* Assumes each other[0] is 0.
*/
void scanAfter() {
int oldline, newline;
for (newline = 0; newline <= newInfo.maxLine; newline++) {
oldline = newInfo.other[newline];
if (oldline >= 0) { /* is unique in old & new */
for (; ;) { /* scan after there in both files */
if (++oldline > oldInfo.maxLine) break;
if (oldInfo.other[oldline] >= 0) break;
if (++newline > newInfo.maxLine) break;
if (newInfo.other[newline] >= 0) break;
/* oldline & newline exist, and
aren't already matched */
if (newInfo.symbol[newline] !=
oldInfo.symbol[oldline])
break; // not same
newInfo.other[newline] = oldline; // record a match
oldInfo.other[oldline] = newline;
}
}
}
}
/**
* scanbefore
* As scanafter, except scans towards file fronts.
* Assumes the off-end lines have been marked as a match.
*/
void scanBefore() {
int oldline, newline;
for (newline = newInfo.maxLine + 1; newline > 0; newline--) {
oldline = newInfo.other[newline];
if (oldline >= 0) { /* unique in each */
for (; ;) {
if (--oldline <= 0) break;
if (oldInfo.other[oldline] >= 0) break;
if (--newline <= 0) break;
if (newInfo.other[newline] >= 0) break;
/* oldline and newline exist,
and aren't marked yet */
if (newInfo.symbol[newline] !=
oldInfo.symbol[oldline])
break; // not same
newInfo.other[newline] = oldline; // record a match
oldInfo.other[oldline] = newline;
}
}
}
}
/**
* scanblocks - Finds the beginnings and lengths of blocks of matches.
* Sets the blocklen array (see definition).
* Expects oldinfo valid.
*/
void scanBlocks() {
int oldline, newline;
int oldfront = 0; // line# of front of a block in old, or 0
int newlast = -1; // newline's value during prev. iteration
for (oldline = 1; oldline <= oldInfo.maxLine; oldline++)
blocklen[oldline] = 0;
blocklen[oldInfo.maxLine + 1] = UNREAL; // starts a mythical blk
for (oldline = 1; oldline <= oldInfo.maxLine; oldline++) {
newline = oldInfo.other[oldline];
if (newline < 0)
oldfront = 0; /* no match: not in block */
else { /* match. */
if (oldfront == 0) oldfront = oldline;
if (newline != (newlast + 1)) oldfront = oldline;
++blocklen[oldfront];
}
newlast = newline;
}
}
/* The following are global to printout's subsidiary routines */
// enum{ idle, delete, insert, movenew, moveold,
// same, change } printstatus;
public static final int
idle = 0, delete = 1, insert = 2, movenew = 3, moveold = 4,
same = 5, change = 6;
int printstatus;
boolean anyprinted;
int printoldline, printnewline; // line numbers in old & new file
/**
* printout - Prints summary to stdout.
* Expects all data structures have been filled out.
*/
private List<ChangeInfo> printOut() {
List<ChangeInfo> result = new ArrayList<ChangeInfo>();
printstatus = idle;
anyprinted = false;
for (printoldline = printnewline = 1; ;) {
if (printoldline > oldInfo.maxLine) {
newConsume(result);
break;
}
if (printnewline > newInfo.maxLine) {
oldConsume(result);
break;
}
if (newInfo.other[printnewline] < 0) {
if (oldInfo.other[printoldline] < 0)
showChange(result);
else
showInsert(result);
} else if (oldInfo.other[printoldline] < 0)
showDelete(result);
else if (blocklen[printoldline] < 0)
skipOld();
else if (oldInfo.other[printoldline] == printnewline)
showSame();
else
showMove(result);
}
setLast(result);
return result;
}
// Stores an info object
// and adds all 'printed' lines to the
// last one
private void setLast(ChangeInfo info, List<ChangeInfo> result) {
setLast(result);
last = info;
}
private void setLast(List<ChangeInfo> result) {
if (null != last) {
last.setLines(lines.toArray(new String[0]));
result.add(last);
}
lines = new ArrayList<String>();
}
/*
* newconsume Part of printout. Have run out of old file.
* Print the rest of the new file, as inserts and/or moves.
*/
private void newConsume(List<ChangeInfo> result) {
for (; ;) {
if (printnewline > newInfo.maxLine)
break; /* end of file */
if (newInfo.other[printnewline] < 0)
showInsert(result);
else
showMove(result);
}
}
/**
* oldconsume Part of printout. Have run out of new file.
* Process the rest of the old file, printing any
* parts which were deletes or moves.
*/
private void oldConsume(List<ChangeInfo> result) {
for (; ;) {
if (printoldline > oldInfo.maxLine)
break; /* end of file */
printnewline = oldInfo.other[printoldline];
if (printnewline < 0)
showDelete(result);
else if (blocklen[printoldline] < 0)
skipOld();
else
showMove(result);
}
}
/**
* showdelete Part of printout.
* Expects printoldline is at a deletion.
*/
private void showDelete(List<ChangeInfo> result) {
if (printstatus != delete) {
ChangeInfo info = new ChangeInfo(ChangeInfo.DELETE, printoldline, printoldline);
setLast(info, result);
}
printstatus = delete;
lines.add(oldInfo.symbol[printoldline].getSymbol());
anyprinted = true;
printoldline++;
}
/*
* showinsert Part of printout.
* Expects printnewline is at an insertion.
*/
private void showInsert(List<ChangeInfo> result) {
if (printstatus == change) {
// result.add(">>>> CHANGED TO");
} else if (printstatus != insert) {
ChangeInfo info = new ChangeInfo(ChangeInfo.INSERT, printoldline, printoldline);
setLast(info, result);
}
printstatus = insert;
lines.add(newInfo.symbol[printnewline].getSymbol());
anyprinted = true;
printnewline++;
}
/**
* showchange Part of printout.
* Expects printnewline is an insertion.
* Expects printoldline is a deletion.
*/
private void showChange(List<ChangeInfo> result) {
if (printstatus != change) {
ChangeInfo info = new ChangeInfo(ChangeInfo.CHANGE, printoldline, printoldline);
setLast(info, result);
}
printstatus = change;
lines.add(oldInfo.symbol[printoldline].getSymbol());
anyprinted = true;
printoldline++;
}
/**
* skipold Part of printout.
* Expects printoldline at start of an old block that has
* already been announced as a move.
* Skips over the old block.
*/
private void skipOld() {
printstatus = idle;
for (; ;) {
if (++printoldline > oldInfo.maxLine)
break; /* end of file */
if (oldInfo.other[printoldline] < 0)
break; /* end of block */
if (blocklen[printoldline] != 0)
break; /* start of another */
}
}
/**
* skipnew Part of printout.
* Expects printnewline is at start of a new block that has
* already been announced as a move.
* Skips over the new block.
*/
private void skipNew() {
int oldline;
printstatus = idle;
for (; ;) {
if (++printnewline > newInfo.maxLine)
break; /* end of file */
oldline = newInfo.other[printnewline];
if (oldline < 0)
break; /* end of block */
if (blocklen[oldline] != 0)
break; /* start of another */
}
}
/**
* showsame Part of printout.
* Expects printnewline and printoldline at start of
* two blocks that aren't to be displayed.
*/
private void showSame() {
int count;
printstatus = idle;
if (newInfo.other[printnewline] != printoldline) {
System.err.println("BUG IN LINE REFERENCING");
}
count = blocklen[printoldline];
printoldline += count;
printnewline += count;
}
/**
* showmove Part of printout.
* Expects printoldline, printnewline at start of
* two different blocks ( a move was done).
*/
private void showMove(List<ChangeInfo> result) {
int oldblock = blocklen[printoldline];
int newother = newInfo.other[printnewline];
int newblock = blocklen[newother];
if (newblock < 0)
skipNew(); // already printed.
else if (oldblock >= newblock) { // assume new's blk moved.
blocklen[newother] = -1; // stamp block as "printed".
ChangeInfo info = new ChangeInfo(ChangeInfo.MOVE, newother, printoldline);
setLast(info, result);
for (; newblock > 0; newblock--, printnewline++)
lines.add(newInfo.symbol[printnewline].getSymbol());
anyprinted = true;
printstatus = idle;
} else /* assume old's block moved */
skipOld(); /* target line# not known, display later */
}
}
/**
* Class "node". The symbol table routines in this class all
* understand the symbol table format, which is a binary tree.
* The methods are: addSymbol, symbolIsUnique, showSymbol.
*/
class Node { /* the tree is made up of these nodes */
Node pleft, pright;
int linenum;
static final int freshnode = 0,
oldonce = 1, newonce = 2, bothonce = 3, other = 4;
int /* enum linestates */ linestate;
String line;
static Node panchor = null; /* symtab is a tree hung from this */
Node(String pline) {
pleft = pright = null;
linestate = freshnode;
/* linenum field is not always valid */
line = pline;
}
/**
* matchsymbol Searches tree for a match to the line.
* If node's linestate == freshnode, then created the node.
*/
static Node matchsymbol(String pline) {
int comparison;
Node pnode = panchor;
if (panchor == null) return panchor = new Node(pline);
for (; ;) {
comparison = pnode.line.compareTo(pline);
if (comparison == 0) return pnode; /* found */
if (comparison < 0) {
if (pnode.pleft == null) {
pnode.pleft = new Node(pline);
return pnode.pleft;
}
pnode = pnode.pleft;
}
if (comparison > 0) {
if (pnode.pright == null) {
pnode.pright = new Node(pline);
return pnode.pright;
}
pnode = pnode.pright;
}
}
/* NOTE: There are return stmts, so control does not get here. */
}
/**
* addSymbol(String pline) - Saves line into the symbol table.
* Returns a handle to the symtab entry for that unique line.
* If inoldfile nonzero, then linenum is remembered.
*/
static Node addSymbol(String pline, boolean inoldfile, int linenum) {
Node pnode;
pnode = matchsymbol(pline); /* find the node in the tree */
if (pnode.linestate == freshnode) {
pnode.linestate = inoldfile ? oldonce : newonce;
} else {
if ((pnode.linestate == oldonce && !inoldfile) ||
(pnode.linestate == newonce && inoldfile))
pnode.linestate = bothonce;
else
pnode.linestate = other;
}
if (inoldfile) pnode.linenum = linenum;
return pnode;
}
/**
* symbolIsUnique Arg is a ptr previously returned by addSymbol.
* -------------- Returns true if the line was added to the
* symbol table exactly once with inoldfile true,
* and exactly once with inoldfile false.
*/
public boolean symbolIsUnique() {
return (linestate == bothonce);
}
/**
* showSymbol Prints the line to stdout.
*/
public String getSymbol() {
return line;
}
}
/** This is the info kept per-source. */
class SourceInfo {
static final int MAXLINECOUNT = 20000;
public int maxLine; /* After input done, # lines in file. */
Node symbol[]; /* The symtab handle of each line. */
int other[]; /* Map of line# to line# in other file */
/* ( -1 means don't-know ). */
/* Allocated AFTER the lines are read. */
/**
* Normal constructor
*/
SourceInfo() {
symbol = new Node[MAXLINECOUNT + 2];
other = null; // allocated later!
}
// This is done late, to be same size as # lines in input file.
void alloc() {
other = new int[symbol.length + 2];
}
}