package com.freetymekiyan.algorithms.level.medium; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; /** * All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When * studying DNA, it is sometimes useful to identify repeated sequences within the DNA. * <p> * Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule. * <p> * For example, * <p> * Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT", * <p> * Return: * ["AAAAACCCCC", "CCCCCAAAAA"]. * Company Tags: LinkedIn * Tags: Hash Table, Bit Manipulation */ public class RepeatedDNASequence { /** * Hash Table. Bit Manipulation. * To optimize space usage, map string to other key that won't collide. * Design a hash function according to observation. * A: 0x41, C: 0x43, G: 0x47, T: 0x54, last 3 bits are different. * 10 chars, each 3 bits, 10 x 3 = 30 bits < 32 * <p> * Key: an int to record the bit mask of current substring, * Value: a boolean, true means showed up before, false means already added * Update the map */ public List<String> findRepeatedDnaSequences(String s) { if (s == null || s.length() < 10) { return Collections.emptyList(); } List<String> res = new ArrayList<>(); Map<Integer, Boolean> map = new HashMap<>(); for (int t = 0, i = 0; i < s.length(); i++) { t = (t << 3 & 0x3FFFFFFF) | (s.charAt(i) & 7); if (map.containsKey(t)) { if (map.get(t)) { res.add(s.substring(i - 9, i + 1)); map.put(t, false); } } else { map.put(t, true); } } return res; } /** * Hash Table. O(n) Time & Space. * HashSet with previous appeared results. */ public List<String> findRepeatedDnaSequencesB(String s) { if (s == null || s.length() < 10) { return Collections.emptyList(); } List<String> res = new ArrayList<>(); Set<String> set = new HashSet<>(); for (int i = 0; i < s.length() - 10; i++) { String sub = s.substring(i, i + 10); if (set.contains(sub)) { res.add(s); } set.add(s); } return res; } }