package edu.princeton.cs.algs4.ch60; import edu.princeton.cs.algs4.SuffixArray; import edu.princeton.cs.introcs.*; /************************************************************************* * Compilation: javac LongestCommonSubstring.java * Execution: java LongestCommonSubstring file1.txt file2.txt * Dependencies: SuffixArray.java StdOut.java In.java * * Reads in two text strings, replaces all consecutive blocks of * whitespace with a single space, and then computes the longest * common substring. * * Assumes that the character '\1' does not appear in either text. * Perhaps, search for a character that does not appear in either text * (and make sure SuffixArray.java doesn't choose the same one). * * % java LongestCommonSubstring tale.txt mobydick.txt * ' seemed on the point of being ' * *************************************************************************/ public class LongestCommonSubstring { public static void main(String[] args) { // read in two string from two files In in1 = new In(args[0]); In in2 = new In(args[1]); String text1 = in1.readAll().trim().replaceAll("\\s+", " "); String text2 = in2.readAll().trim().replaceAll("\\s+", " "); int N1 = text1.length(); // int N2 = text2.length(); // concatenate two string with intervening '\1' String text = text1 + '\1' + text2; int N = text.length(); // compute suffix array of concatenated text SuffixArray suffix = new SuffixArray(text); // search for longest common substring String lcs = ""; for (int i = 1; i < N; i++) { // adjacent suffixes both from first text string if (suffix.index(i) < N1 && suffix.index(i-1) < N1) continue; // adjacent suffixes both from secondt text string if (suffix.index(i) > N1 && suffix.index(i-1) > N1) continue; // check if adjacent suffixes longer common substring int length = suffix.lcp(i); if (length > lcs.length()) { lcs = text.substring(suffix.index(i), suffix.index(i) + length); } } // print out longest common substring StdOut.println(lcs.length()); StdOut.println("'" + lcs + "'"); } } /************************************************************************* * Copyright 2002-2012, Robert Sedgewick and Kevin Wayne. * * This file is part of algs4-package.jar, which accompanies the textbook * * Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne, * Addison-Wesley Professional, 2011, ISBN 0-321-57351-X. * http://algs4.cs.princeton.edu * * * algs4-package.jar is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * algs4-package.jar is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with algs4-package.jar. If not, see http://www.gnu.org/licenses. *************************************************************************/