package com.interview.bits;
import java.util.*;
/**
* Date 03/08/2016
* @author Tushar Roy
*
* Find repeated sequence of length 10 in string consisting of A,C,G,T
*
* Time complexity is O(n)
* Space complexity is O(n)
*
* https://leetcode.com/problems/repeated-dna-sequences/
*/
public class RepeatedDnaSequence {
private static final int mask = 0xFFFFF;
private static final int mask1 = 3;
public List<String> findRepeatedDnaSequences(String s) {
if (s.length() < 10) {
return Collections.emptyList();
}
Set<Integer> set = new HashSet<>();
int val = 0;
for (int i = 0; i < 10; i++) {
val = add(val, s.charAt(i));
}
set.add(val);
List<String> result = new ArrayList<>();
createString(val);
Set<Integer> repeatSet = new HashSet<>();
for (int i = 10; i < s.length(); i++) {
val = add(val, s.charAt(i));
if (set.contains(val) && !repeatSet.contains(val)) {
result.add(createString(val));
repeatSet.add(val);
} else {
set.add(val);
}
}
return result;
}
private String createString(int input) {
StringBuffer sb = new StringBuffer();
for (int i = 9; i >= 0; i--) {
sb.append(getChar(input>>(i*2) & mask1));
}
return sb.toString();
}
private int add(int input, char ch) {
int val = getVal(ch);
input = input<<2;
input = input & mask;
return input | val;
}
private int getVal(char ch) {
switch(ch) {
case 'A':
return 0;
case 'C':
return 1;
case 'G':
return 2;
case 'T':
return 3;
default:
throw new IllegalArgumentException();
}
}
private char getChar(int val) {
switch (val) {
case 0:
return 'A';
case 1:
return 'C';
case 2:
return 'G';
case 3:
return 'T';
default:
throw new IllegalArgumentException();
}
}
public static void main(String args[]) {
RepeatedDnaSequence rds = new RepeatedDnaSequence();
List<String> result = rds.findRepeatedDnaSequences("AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT");
System.out.print(result);
}
}