package org.gbif.checklistbank.authorship;
/**
* **********************************************************************
* Computes the longest common substring of 2 given strings.
* Assumes that the character '\1' does not appear in either text.
* Perhaps, search for a character that does not appear in either text
* (and make sure SuffixArray.java doesn't choose the same one).
* % java LongestCommonSubstring tale.txt mobydick.txt
* ' seemed on the point of being '
* ***********************************************************************
*/
public class LongestCommonSubstring {
public static String lcs(String text1, String text2) {
int N1 = text1.length();
// concatenate two string with intervening '\1'
String text = text1 + '\1' + text2;
int N = text.length();
// compute suffix array of concatenated text
SuffixArray suffix = new SuffixArray(text);
// search for longest common substring
String lcs = "";
for (int i = 1; i < N; i++) {
// adjacent suffixes both from first text string
if (suffix.index(i) < N1 && suffix.index(i - 1) < N1) continue;
// adjacent suffixes both from secondt text string
if (suffix.index(i) > N1 && suffix.index(i - 1) > N1) continue;
// check if adjacent suffixes longer common substring
int length = suffix.lcp(i);
if (length > lcs.length()) {
lcs = text.substring(suffix.index(i), suffix.index(i) + length);
}
}
return lcs;
}
}