package com.interview.leetcode.strings;
/**
* Created_By: stefanie
* Date: 14-11-17
* Time: 下午4:49
*/
public class SubstringLocator {
/**
* The brute-force solution, in every position i in str, try to find pattern,
* if not matchChar, i move to i++ and matchChar pattern from start
* Time: O(M*N) for worst case: "aaaaaaaaab" and "aaaab"
*/
public static int match(String str, String pattern){
for(int i = 0; i < str.length(); i++) {
int j = 0;
for (; j < pattern.length() && pattern.charAt(j) == str.charAt(i + j); j++);
if (j >= pattern.length()) return i;
}
return -1;
}
/**
* KMP:
* the improve for brute-force solution, every time not matchChar, i++ and matchChar pattern from start.
* and if there is repeat pattern in pattern, it repeat to matching again.
* In KMP, pre-processing pattern, to find if j-th char is not matchChar, pattern should back-tracing to next[j]
* in the process, i will not back-trace. so the time complexity is O(N)
*
* next[j]: the max length to achieve pattern.sub(0, next[j]) == pattern(j - next[j], j) //length is next[j]
* first next[j] chars == back tracing next[j] chars before j
* sample: a a a a b a a b
* next[i]: -1 0 1 2 3 0 1 2
*
* sample: a b a b a b
* next[i]: -1 0 0 1 2 3
*
* see the code {@link #calNext(String)}
*
* The meaning is if s1 s2 s3 s4 matches p1 p2 p3 p4, but s5 != p5,
* if p1 p2 = p3 p4, then s3 s4 = p3 p4 = p1 p2, so j back to p3, to matchChar p3 and p5
*
*/
public static int kmpMatch(String str, String pattern){
int[] next = calNext(pattern);
int i = 0, j = 0; //two pointer to visit str and pattern
while(i < str.length() && j < pattern.length()){
if(pattern.charAt(j) == str.charAt(i)){ //matched
i++;
j++;
} else if(j == 0) i++; //not matched, but pattern is the first char, i move one step
else j = next[j]; //not matched, j move to char need to matchChar find by next[j]. move duplicate comparison
if(j == pattern.length()) return i - j; //found a matchChar
}
return -1;
}
private static int[] calNext(String pattern){
int[] next = new int[pattern.length()];
int front = 0, back = -1; //init front from 0, back from -1
next[0] = -1;
while(front < next.length - 1){
if(back == -1 || pattern.charAt(front) == pattern.charAt(back)) //back already -1 or front char == end char
next[++front] = ++back; //the next of next char of front to ++back, cause next is length (offset + 1)
else back = next[back]; //otherwise, back pointer back to not matching next position, like in kmpMatch()
}
return next;
}
}