package com.interview.algorithms.string;
import com.interview.basics.sort.QuickSorter;
import java.util.ArrayList;
import java.util.List;
/**
* Created_By: stefanie
* Date: 14-9-11
* Time: 下午9:25
*
* Given a string, write code to find the longest substring which repeated more than once.
*
* 1. The bruce-force search goes from N-1 substring to 2 substring. O(N^3)
* 2. The enhanced solution is using suffix array. O(N^2logN)
* suffix array is a data structure, for a given string, put all its suffix in an array. O(N)
* for example: abcdab -> [abcdab,bcdab,cdab,dab,ab,b]
* then sort the suffix array O(N*lgN*N)
* [abcdab,ab,bcdab,b,cdab,dab]
* then in the sorted suffix array, check the common len with its next one from offset 0. O(N*N)
* [ab, "", b, "", ""]
* so the longest substring is "ab".
* The total is O(N^2lgN)
* 后缀数组是一种数据结构,对一个字符串生成相应的后缀数组后,然后再排序,排完序依次检测相邻的两个字符串的开头公共部分。
* 这样的时间复杂度为:生成后缀数组 O(N),排序 O(NlogN*N) 最后面的 N 是因为字符串比较也是 O(N)
* 依次检测相邻的两个字符串 O(N * N),总的时间复杂度是 O(N^2*logN)
*/
public class C11_26_LongestRepeatSubstring {
static QuickSorter<String> SORTER = new QuickSorter<String>();
public static String find(String str) {
String[] suffix = new String[str.length() - 1];
for(int i = 0; i < str.length() - 1; i++){
suffix[i] = (str.substring(i));
}
SORTER.sort(suffix);
String max = "";
int maxLen = 0;
for(int i = 0; i < suffix.length - 1; i++){
int index = comlen(suffix[i], suffix[i+1]);
if(index > maxLen){
maxLen = index;
max = suffix[i].substring(0, maxLen);
}
}
return max;
}
private static int comlen(String p, String q){
int i = 0;
while( i < p.length() && (p.charAt(i) == q.charAt(i))) ++i;
return i;
}
}