/*
* Copyright 2015 MiLaboratory.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.milaboratory.core.tree;
import com.milaboratory.core.mutations.Mutations;
import com.milaboratory.core.sequence.*;
import org.apache.commons.math3.random.RandomDataGenerator;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.random.Well19937a;
import org.junit.Test;
import java.util.*;
import static com.milaboratory.core.sequence.SequencesUtils.concatenate;
import static com.milaboratory.test.TestUtil.its;
import static com.milaboratory.test.TestUtil.randomSequence;
import static org.hamcrest.CoreMatchers.not;
import static org.junit.Assert.*;
public class SequenceTreeMapTest {
private int repeats = 10;
public SequenceTreeMapTest() {
this.repeats = 1;
String val = System.getProperty("repeats");
if (val != null) {
try {
this.repeats = Integer.valueOf(val, 10);
} catch (NumberFormatException nfe) {
}
}
}
/*
* Exact tests
*/
@Test
public void testExact1() throws Exception {
SequenceTreeMap<NucleotideSequence, Integer> map = new SequenceTreeMap<>(NucleotideSequence.ALPHABET);
assertNull(map.put(new NucleotideSequence("attagaca"), 1));
assertEquals((Integer) 1, map.put(new NucleotideSequence("attagaca"), 2));
assertNull(map.put(new NucleotideSequence("attacaca"), 3));
assertEquals((Integer) 3, map.get(new NucleotideSequence("attacaca")));
Set<NucleotideSequence> sequences = new HashSet<>();
sequences.add(new NucleotideSequence("attacaca"));
sequences.add(new NucleotideSequence("attagaca"));
Set<Integer> ints = new HashSet<>();
ints.add(2);
ints.add(3);
SequenceTreeMap.Node node;
for (SequenceTreeMap.NodeIterator iterator = map.nodeIterator(); iterator.hasNext(); ) {
node = iterator.next();
assertTrue(ints.remove(node.object));
assertTrue(sequences.remove(iterator.getSequence()));
}
assertTrue(sequences.isEmpty());
assertTrue(ints.isEmpty());
}
@Test
public void testExact2() throws Exception {
SequenceTreeMap<NucleotideSequence, Integer> map = new SequenceTreeMap<>(NucleotideSequence.ALPHABET);
Set<NucleotideSequence> sequences = new HashSet<>();
Set<Integer> ints = new HashSet<>();
assertNull(map.put(new NucleotideSequence("attacacaattaattacacacacaattacaca"), 3));
sequences.add(new NucleotideSequence("attacacaattaattacacacacaattacaca"));
ints.add(3);
assertNull(map.put(new NucleotideSequence("attacacaattacacaattacgacacttacaca"), 4));
sequences.add(new NucleotideSequence("attacacaattacacaattacgacacttacaca"));
ints.add(4);
assertNull(map.put(new NucleotideSequence("atattattacacaacacatacattacacaaca"), 5));
sequences.add(new NucleotideSequence("atattattacacaacacatacattacacaaca"));
ints.add(5);
assertNull(map.put(new NucleotideSequence("attacacaattacacaattacacaattacacaattacacaattacaca"), 19));
sequences.add(new NucleotideSequence("attacacaattacacaattacacaattacacaattacacaattacaca"));
ints.add(19);
SequenceTreeMap.Node node;
for (SequenceTreeMap.NodeIterator iterator = map.nodeIterator(); iterator.hasNext(); ) {
node = iterator.next();
assertTrue(ints.remove(node.object));
assertTrue(sequences.remove(iterator.getSequence()));
}
assertTrue(sequences.isEmpty());
assertTrue(ints.isEmpty());
}
/*
* Branching enumerator tests
*/
@Test
public void testBranchingEnumerator1() {
SequenceTreeMap<NucleotideSequence, Integer> map = new SequenceTreeMap<>(NucleotideSequence.ALPHABET);
assertNull(map.put(new NucleotideSequence("attagaca"), 1)); // 1 mm
assertNull(map.put(new NucleotideSequence("attacaca"), 2)); // match
assertNull(map.put(new NucleotideSequence("ataacaca"), 3)); // 1 mm
assertNull(map.put(new NucleotideSequence("attcgtca"), 4)); // many mm
assertNull(map.put(new NucleotideSequence("atttacaca"), 5)); // 1 insertion in stretch
assertNull(map.put(new NucleotideSequence("atacaca"), 6)); // 1 deletion in the "t" stretch
assertNull(map.put(new NucleotideSequence("attacacta"), 7)); // 1 insertion
assertNull(map.put(new NucleotideSequence("attcaca"), 8)); // 1 deletion
assertNull(map.put(new NucleotideSequence("attacac"), 9)); // 1 deletion in the end
assertNull(map.put(new NucleotideSequence("ttacaca"), 10)); // 1 deletion in the beginning
assertNull(map.put(new NucleotideSequence("tattacaca"), 11)); // 1 insertion in the beginning
assertNull(map.put(new NucleotideSequence("attacacat"), 12)); // 1 insertion in the ent
assertNull(map.put(new NucleotideSequence("attacact"), 13)); // 1 mm end
assertNull(map.put(new NucleotideSequence("tttacaca"), 14)); // 1 mm begin
HashSet<Integer>[] asserts = new HashSet[3];
asserts[0] = new HashSet<>(Arrays.asList(1, 3, 13, 14));
asserts[1] = new HashSet<>(Arrays.asList(6, 8, 9, 10));
asserts[2] = new HashSet<>(Arrays.asList(5, 7, 11, 12));
NucleotideSequence reference = new NucleotideSequence("attacaca");
for (byte mode = 0; mode < 3; ++mode) {
BranchingEnumerator<NucleotideSequence, Integer> e =
new BranchingEnumerator<>(reference, null);
e.setup(mode, false);
e.reset(0, map.root);
SequenceTreeMap.Node<Integer> n;
HashSet<Integer> collector = new HashSet<>();
while ((n = e.next()) != null) {
int i = e.getNextPositionAfterBranching();
while (i < reference.size() && n != null)
n = n.links[reference.codeAt(i++)];
if (n != null && n.object != null)
collector.add(n.object);
}
assertEquals(asserts[mode], collector);
}
}
/*
* Non-randomised tests for NeighborhoodIterator
*/
@Test
public void testGetMutations() throws Exception {
SequenceTreeMap<NucleotideSequence, NucleotideSequence> map = new SequenceTreeMap<>(NucleotideSequence.ALPHABET);
NucleotideSequence[] seqs = new NucleotideSequence[]{
new NucleotideSequence("attagaca"),
new NucleotideSequence("attagaca"),
new NucleotideSequence("attacaca"),
new NucleotideSequence("ataacaca"),
new NucleotideSequence("attcgtca"),
new NucleotideSequence("atttacaca"),
new NucleotideSequence("atacaca"),
new NucleotideSequence("attacacta"),
new NucleotideSequence("attcaca"),
new NucleotideSequence("attacac"),
new NucleotideSequence("ttacaca"),
new NucleotideSequence("tattacaca"),
new NucleotideSequence("attacacat"),
new NucleotideSequence("attacact"),
new NucleotideSequence("tttacaca")
};
for (NucleotideSequence seq : seqs)
map.put(seq, seq);
NucleotideSequence reference = new NucleotideSequence("attacaca");
NeighborhoodIterator<NucleotideSequence, NucleotideSequence> ni = map.getNeighborhoodIterator(reference, 2.0,
new double[]{0.31, 0.301, 0.3001}, new int[]{2, 2, 2}, null);
for (NucleotideSequence seq : ni.it()) {
Mutations<NucleotideSequence> mutations = ni.getCurrentMutations();
assertEquals(seq, mutations.mutate(reference));
}
}
@Test
public void testNIterator() throws Exception {
SequenceTreeMap<NucleotideSequence, Integer> map = new SequenceTreeMap<>(NucleotideSequence.ALPHABET);
assertNull(map.put(new NucleotideSequence("attagaca"), 1)); // 1 mm
assertNull(map.put(new NucleotideSequence("attacaca"), 2)); // match
assertNull(map.put(new NucleotideSequence("ataacaca"), 3)); // 1 mm
assertNull(map.put(new NucleotideSequence("attcgtca"), 4)); // many mm
assertNull(map.put(new NucleotideSequence("atttacaca"), 5)); // 1 insertion in stretch
assertNull(map.put(new NucleotideSequence("atacaca"), 6)); // 1 deletion in the "t" stretch
assertNull(map.put(new NucleotideSequence("attacacta"), 7)); // 1 insertion
assertNull(map.put(new NucleotideSequence("attcaca"), 8)); // 1 deletion
assertNull(map.put(new NucleotideSequence("attacac"), 9)); // 1 deletion in the end
assertNull(map.put(new NucleotideSequence("ttacaca"), 10)); // 1 deletion in the beginning
assertNull(map.put(new NucleotideSequence("tattacaca"), 11)); // 1 insertion in the beginning
assertNull(map.put(new NucleotideSequence("attacacat"), 12)); // 1 insertion in the ent
assertNull(map.put(new NucleotideSequence("attacact"), 13)); // 1 mm end
assertNull(map.put(new NucleotideSequence("tttacaca"), 14)); // 1 mm begin
NucleotideSequence reference = new NucleotideSequence("attacaca");
SequenceTreeMap.Node<Integer> node;
HashSet<Integer>[] allAsserts = new HashSet[3];
allAsserts[0] = new HashSet<>(Arrays.asList(1, 3, 13, 14));
allAsserts[1] = new HashSet<>(Arrays.asList(6, 8, 9, 10));
allAsserts[2] = new HashSet<>(Arrays.asList(5, 7, 11, 12));
for (int i = 0; i < 8; ++i) {
double lastPenalty = -1.0;
HashSet<Integer> asserts = new HashSet<>();
asserts.add(2);
int[] maxMut = new int[3];
for (int j = 0; j < 3; ++j) {
if (((0x1 << j) & i) != 0) {
maxMut[j] = 1;
asserts.addAll(allAsserts[j]);
}
}
HashSet<Integer> asserts1 = new HashSet<>(asserts);
NeighborhoodIterator ni = map.getNeighborhoodIterator(reference, 0.5,
new double[]{0.31, 0.301, 0.3001}, maxMut, null);
while ((node = ni.nextNode()) != null) {
assertTrue(lastPenalty <= ni.getPenalty());
lastPenalty = ni.getPenalty();
asserts.remove(node.object);
assertTrue(asserts1.contains(node.object));
}
assertTrue(asserts.isEmpty());
}
}
@Test
public void testEdge1() throws Exception {
NucleotideSequence sequence1 = new NucleotideSequence("CTG"),
sequence2 = new NucleotideSequence("C");
SequenceTreeMap<NucleotideSequence, Integer> map = new SequenceTreeMap<>(NucleotideSequence.ALPHABET);
//map.put(sequence1, 1);
map.put(sequence2, 2);
NeighborhoodIterator<NucleotideSequence, Integer> neighborhoodIterator =
map.getNeighborhoodIterator(sequence1, 1.0,
new double[]{0.1, 0.1, Double.MAX_VALUE},
new int[]{0, 2, 0}, null);
//System.out.println(neighborhoodIterator.nextNode().object);
}
@Test
public void testEdge2() throws Exception {
NucleotideSequence sequence1 = new NucleotideSequence("CTG"),
sequence2 = new NucleotideSequence("CGT");
SequenceTreeMap<NucleotideSequence, Integer> map = new SequenceTreeMap<>(NucleotideSequence.ALPHABET);
//map.put(sequence1, 1);
map.put(sequence2, 2);
NeighborhoodIterator<NucleotideSequence, Integer> neighborhoodIterator =
map.getNeighborhoodIterator(sequence1, 1.0,
new double[]{0.1, 0.1, Double.MAX_VALUE},
new int[]{2, 0, 0}, null);
//System.out.println(neighborhoodIterator.nextNode().object);
}
@Test
public void testEdge3() throws Exception {
NucleotideSequence sequence1 = new NucleotideSequence("C"),
sequence2 = new NucleotideSequence("CTG");
SequenceTreeMap<NucleotideSequence, Integer> map = new SequenceTreeMap<>(NucleotideSequence.ALPHABET);
//map.put(sequence1, 1);
map.put(sequence2, 2);
NeighborhoodIterator<NucleotideSequence, Integer> neighborhoodIterator =
map.getNeighborhoodIterator(sequence1, 1.0,
new double[]{0.1, 0.1, 0.1},
new int[]{0, 0, 2}, null);
//System.out.println(neighborhoodIterator.nextNode().object);
}
/*
* Non-randomised tests for NeighborhoodIterator in guided mode
*/
@Test
public void testGuideDel() throws Exception {
SequenceTreeMap<NucleotideSequence, Integer> map = new SequenceTreeMap<>(NucleotideSequence.ALPHABET);
map.put(new NucleotideSequence("attacacaattaattacacacacaattacaca"), 3);
NucleotideSequence sequence = new NucleotideSequence("attacacaattaatttacacacacaattacaca");
NeighborhoodIterator<NucleotideSequence, Integer> neighborhoodIterator =
map.getNeighborhoodIterator(sequence, new TreeSearchParameters(new int[]{1, 1, 0},
new double[]{0.1, 0.1, Double.MAX_VALUE}, 0.2),
new MutationGuide<NucleotideSequence>() {
@Override
public boolean allowMutation(NucleotideSequence ref, int position, byte type, byte code) {
return position == 15 && type == 1;
}
}
);
assertNotNull(neighborhoodIterator.nextNode());
neighborhoodIterator =
map.getNeighborhoodIterator(sequence, 0.2,
new double[]{0.1, 0.1, Double.MAX_VALUE},
new int[]{1, 1, 0}, new MutationGuide<NucleotideSequence>() {
@Override
public boolean allowMutation(NucleotideSequence ref, int position, byte type, byte code) {
return position == 16 && type == 1;
}
}
);
assertNull(neighborhoodIterator.nextNode());
}
@Test
public void testGuideMM() throws Exception {
SequenceTreeMap<NucleotideSequence, Integer> map = new SequenceTreeMap<>(NucleotideSequence.ALPHABET);
map.put(new NucleotideSequence("attacacaattaattacacacacaattacaca"), 3);
//map.put(new NucleotideSequence("attacacaattaatttacacacacaattacaca"), 4);
NucleotideSequence sequence = new NucleotideSequence("attacacaattaataacacacacaattacaca");
NeighborhoodIterator<NucleotideSequence, Integer> neighborhoodIterator =
map.getNeighborhoodIterator(sequence, 0.2,
new double[]{0.1, 0.1, Double.MAX_VALUE},
new int[]{1, 1, 0}, new MutationGuide<NucleotideSequence>() {
@Override
public boolean allowMutation(NucleotideSequence ref, int position, byte type, byte code) {
return position == 14 && type == 0;
}
}
);
assertNotNull(neighborhoodIterator.nextNode());
neighborhoodIterator =
map.getNeighborhoodIterator(sequence, 0.2,
new double[]{0.1, 0.1, Double.MAX_VALUE},
new int[]{1, 1, 0}, new MutationGuide<NucleotideSequence>() {
@Override
public boolean allowMutation(NucleotideSequence ref, int position, byte type, byte code) {
return position == 15 && type == 0;
}
}
);
assertNull(neighborhoodIterator.nextNode());
}
@Test
public void testGuideIns() throws Exception {
SequenceTreeMap<NucleotideSequence, Integer> map = new SequenceTreeMap<>(NucleotideSequence.ALPHABET);
map.put(new NucleotideSequence("attacacaattaattacacacacaattacaca"), 3);
//map.put(new NucleotideSequence("attacacaattaatttacacacacaattacaca"), 4);
NucleotideSequence sequence = new NucleotideSequence("attacacaattaatacacacacaattacaca");
NeighborhoodIterator<NucleotideSequence, Integer> neighborhoodIterator =
map.getNeighborhoodIterator(sequence, 0.2,
new double[]{0.1, 0.1, 0.1},
new int[]{1, 1, 1}, new MutationGuide<NucleotideSequence>() {
@Override
public boolean allowMutation(NucleotideSequence ref, int position, byte type, byte code) {
return position == 14 && type == 2;
}
}
);
assertNotNull(neighborhoodIterator.nextNode());
neighborhoodIterator =
map.getNeighborhoodIterator(sequence, 0.2,
new double[]{0.1, 0.1, 0.1},
new int[]{1, 1, 1}, new MutationGuide<NucleotideSequence>() {
@Override
public boolean allowMutation(NucleotideSequence ref, int position, byte type, byte code) {
return position == 15 && type == 2;
}
}
);
assertNull(neighborhoodIterator.nextNode());
}
@Test
public void testGuideNew() throws Exception {
SequenceTreeMap<AminoAcidSequence, Integer> map = new SequenceTreeMap<>(AminoAcidSequence.ALPHABET);
map.put(new AminoAcidSequence("AAXSFD"), 3);
map.put(new AminoAcidSequence("AAXFD"), 4);
Set<Integer> set = new HashSet<>();
Integer i;
MutationGuide guide = new MutationGuide() {
@Override
public boolean allowMutation(Sequence reference, int position, byte type, byte to) {
//TODO fix!!!!
return type == 2 ;//&& to == IncompleteAminoAcidSequence.UNKNOWN_LETTER_CODE;
}
};
NeighborhoodIterator<AminoAcidSequence, Integer> ni = map.getNeighborhoodIterator(
new AminoAcidSequence("AASFD"), 1, 1, 1, 1, guide);
while ((i = ni.next()) != null) {
set.add(i);
}
assertTrue(set.contains(3));
assertFalse(set.contains(4));
guide = new MutationGuide() {
@Override
public boolean allowMutation(Sequence reference, int position, byte type, byte to) {
return type == 2 && to == AminoAcidAlphabet.F;
}
};
ni = map.getNeighborhoodIterator(new AminoAcidSequence("AAXSD"), 1, 1, 1, 1, guide);
while ((i = ni.next()) != null) {
set.add(i);
}
assertTrue(set.contains(3));
assertFalse(set.contains(4));
guide = new MutationGuide() {
@Override
public boolean allowMutation(Sequence reference, int position, byte type, byte to) {
return type == 1 && reference.codeAt(position) == AminoAcidAlphabet.G;
}
};
ni = map.getNeighborhoodIterator(new AminoAcidSequence("AA_SGFD"), 1, 1, 1, 1, guide);
while ((i = ni.next()) != null) {
set.add(i);
}
assertTrue(set.contains(3));
assertFalse(set.contains(4));
}
/*
* Randomized tests
*/
final RandomGenerator random = new Well19937a();
/*
* Utility functions and their tests
*/
// private Sequence getRandomSequence(Alphabet alphabet, int length) {
// SequenceBuilder builder = alphabet.getBuilderFactory().create(length);
// for (int i = 0; i < length; ++i)
// builder.setCode(i, (byte) random.nextInt(alphabet.size()));
// return builder.create();
// }
private Sequence introduceMutation(Sequence sequence, int type) {
SequenceBuilder builder;
int position, i;
switch (type) {
case -1: //Copy
return sequence;
case 0: //Mismatch
if (sequence.getAlphabet() == NucleotideSequence.ALPHABET)
return introduceNucleotideMismatch((NucleotideSequence) sequence);
builder = sequence.getAlphabet().createBuilder().ensureCapacity(sequence.size());
builder.append(sequence);
position = random.nextInt(sequence.size());
builder.set(position,
(byte) ((sequence.codeAt(position) + 1 + random.nextInt(sequence.getAlphabet().size() - 1)) %
sequence.getAlphabet().size()));
return (Sequence) builder.createAndDestroy();
case 1: //Deletion
builder = sequence.getAlphabet().createBuilder().ensureCapacity(sequence.size() - 1);
position = random.nextInt(sequence.size());
for (i = 0; i < position; ++i)
builder.append(sequence.codeAt(i));
++i;
for (; i < sequence.size(); ++i)
builder.append(sequence.codeAt(i));
return (Sequence) builder.createAndDestroy();
case 2: //Insertion
builder = sequence.getAlphabet().createBuilder().ensureCapacity(sequence.size() + 1);
position = random.nextInt(sequence.size() + 1);
for (i = 0; i < position; ++i)
builder.append(sequence.codeAt(i));
builder.append((byte) random.nextInt(sequence.getAlphabet().size()));
for (; i < sequence.size(); ++i)
builder.append(sequence.codeAt(i));
return (Sequence) builder.createAndDestroy();
default:
throw new IllegalArgumentException();
}
}
private NucleotideSequence introduceNucleotideMismatch(NucleotideSequence sequence) {
final byte[] storage = sequence.asArray();
int position = random.nextInt(storage.length);
storage[position] = (byte) (0x3 & (storage[position] + 1 + random.nextInt(3)));
return new NucleotideSequence(storage);
}
final static Alphabet[] alphabets = {NucleotideSequence.ALPHABET, AminoAcidSequence.ALPHABET};
private Alphabet getRandomAlphabet() {
return alphabets[random.nextInt(alphabets.length)];
}
private Alphabet getAlphabetSequence(int id) {
return alphabets[id % alphabets.length];
}
@Test
public void testIntroduceMutation1() throws Exception {
NucleotideSequence sequence;
for (int i = its(100, 500); i > 0; --i) {
sequence = (NucleotideSequence) randomSequence(NucleotideSequence.ALPHABET, 100, 100);
for (int j = its(100, 500); j > 0; --j)
assertThat(sequence, not(introduceNucleotideMismatch(sequence)));
}
}
@Test
public void testIntroduceMutation2() throws Exception {
Sequence sequence;
Alphabet alphabet;
for (int i = its(100, 500); i > 0; --i) {
alphabet = getRandomAlphabet();
sequence = randomSequence(alphabet, 100, 100);
//Testing correct equals implementation
assertEquals(sequence, introduceMutation(sequence, -1));
for (int j = its(100, 500); j > 0; --j) {
assertThat(sequence, not(introduceMutation(sequence, 0)));
assertThat(sequence, not(introduceMutation(sequence, 1)));
assertThat(sequence, not(introduceMutation(sequence, 2)));
}
}
}
/*
* More utility functions for randomized testing
*/
private <S extends Sequence<S>> SequenceCluster<S> generateCluster(Alphabet<S> alphabet, int maxInCluster, int... maxMutations) {
final SequenceCluster cluster = new SequenceCluster<>(randomSequence(alphabet, 200, 400));
Sequence seq;
int i, j;
int[] mutations;
for (i = 1 + random.nextInt(maxInCluster - 1); i > 0; --i) {
seq = cluster.sequence;
mutations = new int[3];
for (j = (mutations[1] = randomInt(maxMutations[1] + 1)); j > 0; --j)
seq = introduceMutation(seq, 1);
for (j = (mutations[0] = randomInt(maxMutations[0] + 1)); j > 0; --j)
seq = introduceMutation(seq, 0);
for (j = (mutations[2] = randomInt(maxMutations[2] + 1)); j > 0; --j)
seq = introduceMutation(seq, 2);
if (seq.equals(cluster.sequence))
continue;
cluster.add(new MutatedSequence(seq, mutations));
}
return cluster;
}
private Sequence introduceErrors(Sequence seq, int[] maxMutations) {
while (true) {
int j;
Sequence sequence = seq;
for (j = randomInt(maxMutations[1] + 1); j > 0; --j)
sequence = introduceMutation(sequence, 1);
for (j = randomInt(maxMutations[0] + 1); j > 0; --j)
sequence = introduceMutation(sequence, 0);
for (j = randomInt(maxMutations[2] + 1); j > 0; --j)
sequence = introduceMutation(sequence, 2);
if (seq.equals(sequence))
continue;
return sequence;
}
}
/**
* Template for randomized test. See {@link #testRandomizedTest4Clusters()}
*/
public <S extends Sequence<S>> void clusterTest(Alphabet<S> alphabet, int clusterCount, int inCluster, int[] errors) {
SequenceCluster<S>[] clusters = new SequenceCluster[clusterCount];
SequenceTreeMap<S, Integer> sequenceTreeMap = new SequenceTreeMap<>(alphabet);
for (int i = 0; i < clusters.length; ++i) {
clusters[i] = generateCluster(alphabet, inCluster, errors);
sequenceTreeMap.put(clusters[i].sequence, 0);
for (MutatedSequence<S> s : clusters[i].mutatedSequences)
sequenceTreeMap.put(s.sequence, s.hashCode());
}
for (int i = 0; i < clusters.length; ++i) {
NeighborhoodIterator<S, Integer> neighborhoodIterator =
sequenceTreeMap.getNeighborhoodIterator(clusters[i].sequence, 1.0,
new double[]{0.1, 0.1, 0.1},
errors, null);
Set<Integer> set = new HashSet<>(clusters[i].hashes);
set.add(0);
SequenceTreeMap.Node<Integer> n;
while ((n = neighborhoodIterator.nextNode()) != null)
set.remove(n.object);
if (!set.isEmpty()) {
int k = set.iterator().next();
for (MutatedSequence ms : clusters[i].mutatedSequences)
if (ms.hashCode() == k)
set.remove(1);
}
assertTrue(set.isEmpty());
}
}
int randomInt(int i) {
if (i == 0)
return 0;
return random.nextInt(i);
}
/*
* Randomized tests
*/
@Test
public void testRemoveTest() throws Exception {
RandomGenerator gen = new Well19937a();
RandomDataGenerator data = new RandomDataGenerator(gen);
for (int k = 0; k < 100; ++k) {
Set<NucleotideSequence> seqSet = new HashSet<>();
for (int i = 0; i < 1000; ++i)
seqSet.add(randomSequence(NucleotideSequence.ALPHABET, data, 4, 30));
SequenceTreeMap<NucleotideSequence, NucleotideSequence> seqTree = new SequenceTreeMap<>(NucleotideSequence.ALPHABET);
for (NucleotideSequence seq : seqSet)
seqTree.put(seq, seq);
int n = 0;
for (NucleotideSequence seq : seqTree.values()) {
assertTrue(seqSet.contains(seq));
++n;
}
assertEquals(seqSet.size(), n);
for (NucleotideSequence seq : seqSet)
assertEquals(seq, seqTree.remove(seq));
for (int i = 0; i < 4; ++i)
assertNull(seqTree.root.links[i]);
n = 0;
for (NucleotideSequence seq : seqTree.values())
++n;
assertEquals(0, n);
}
}
@Test
public void testRandomizedTest1() throws Exception {
for (int f = 0; f < repeats; ++f) {
//System.out.println(f);
Alphabet<NucleotideSequence> alphabet = NucleotideSequence.ALPHABET;
SequenceCluster<NucleotideSequence>[] clusters = new SequenceCluster[300];
SequenceTreeMap<NucleotideSequence, Integer> sequenceTreeMap = new SequenceTreeMap<>(alphabet);
for (int i = 0; i < clusters.length; ++i) {
clusters[i] = generateCluster(alphabet, 70, 2, 2, 0);
sequenceTreeMap.put(clusters[i].sequence, 0);
for (MutatedSequence<NucleotideSequence> s : clusters[i].mutatedSequences)
sequenceTreeMap.put(s.sequence, s.hashCode());
}
for (int i = 0; i < clusters.length; ++i) {
NeighborhoodIterator<NucleotideSequence, Integer> neighborhoodIterator =
sequenceTreeMap.getNeighborhoodIterator(clusters[i].sequence, 1.0,
new double[]{0.1, 0.1, Double.MAX_VALUE},
new int[]{2, 2, 0}, null);
Set<Integer> set = new HashSet<>(clusters[i].hashes);
set.add(0);
SequenceTreeMap.Node<Integer> n;
while ((n = neighborhoodIterator.nextNode()) != null)
set.remove(n.object);
if (!set.isEmpty()) {
int k = set.iterator().next();
for (MutatedSequence ms : clusters[i].mutatedSequences)
if (ms.hashCode() == k)
set.remove(1);
}
assertTrue(set.isEmpty());
}
}
}
@Test
public void testRandomizedTest2() throws Exception {
for (int f = 0; f < repeats; ++f) {
//System.out.println(f);
Alphabet<NucleotideSequence> alphabet = NucleotideSequence.ALPHABET;
SequenceCluster<NucleotideSequence>[] clusters = new SequenceCluster[300];
SequenceTreeMap<NucleotideSequence, NucleotideSequence> sequenceTreeMap = new SequenceTreeMap<>(alphabet);
for (int i = 0; i < clusters.length; ++i) {
clusters[i] = generateCluster(alphabet, 70, 2, 2, 0);
sequenceTreeMap.put(clusters[i].sequence, clusters[i].sequence);
for (MutatedSequence<NucleotideSequence> s : clusters[i].mutatedSequences)
sequenceTreeMap.put(s.sequence, s.sequence);
}
for (int i = 0; i < clusters.length; ++i) {
NeighborhoodIterator<NucleotideSequence, NucleotideSequence> neighborhoodIterator =
sequenceTreeMap.getNeighborhoodIterator(clusters[i].sequence, 1.0,
new double[]{0.1, 0.1, Double.MAX_VALUE},
new int[]{2, 2, 0}, null);
SequenceTreeMap.Node<NucleotideSequence> n;
while ((n = neighborhoodIterator.nextNode()) != null) {
Mutations<NucleotideSequence> mutations = neighborhoodIterator.getCurrentMutations();
assertEquals(n.getObject(), mutations.mutate(clusters[i].sequence));
}
}
}
}
@Test
public void testRandomizedTest4Clusters() {
for (byte t = 0; t < 3; ++t) {
int[] mut = new int[3];
mut[t] = 2;
clusterTest(NucleotideSequence.ALPHABET, 100, 30, mut);
clusterTest(AminoAcidSequence.ALPHABET, 100, 30, mut);
}
}
/*
* Randomized tests for guided search
*/
@Test
public void testRandomizedTest3() throws Exception {
for (int f = 0; f < repeats * 6; ++f) {
//System.out.println(f);
Alphabet alphabet = getAlphabetSequence(f);
for (byte t = 0; t < 3; ++t) {
final Sequence seqRight = randomSequence(alphabet, 50, 100),
seqLeft = randomSequence(alphabet, 50, 100),
spacer = randomSequence(alphabet, 200, 200),
goodSequence = concatenate(seqLeft, spacer, seqRight);
SequenceTreeMap map = new SequenceTreeMap(alphabet);
int[] mut = new int[3];
mut[t] = 3;
HashSet<Sequence> lErr = new HashSet<>(),
rErr = new HashSet<>(), lrErr = new HashSet<>();
Sequence seq1, seq2, mseq;
for (int i = 0; i < 100; ++i) {
//Left Error
seq1 = introduceErrors(seqLeft, mut);
mseq = concatenate(seq1, spacer, seqRight);
lErr.add(mseq);
map.put(mseq, mseq);
//Right Error
seq1 = introduceErrors(seqRight, mut);
mseq = concatenate(seqLeft, spacer, seq1);
rErr.add(mseq);
map.put(mseq, mseq);
//LR Error
seq1 = introduceErrors(seqLeft, mut);
seq2 = introduceErrors(seqRight, mut);
mseq = concatenate(seq1, spacer, seq2);
lrErr.add(mseq);
map.put(mseq, mseq);
}
SequenceTreeMap.Node<Sequence> n;
//Left run
NeighborhoodIterator neighborhoodIterator =
map.getNeighborhoodIterator(goodSequence, 1.3,
new double[]{0.1, 0.1, 0.1},
mut, new MutationGuide() {
@Override
public boolean allowMutation(Sequence ref, int position, byte type, byte code) {
return position < seqLeft.size() + 100;
}
}
);
HashSet<Sequence> acc = new HashSet<>(lErr);
while ((n = neighborhoodIterator.nextNode()) != null) {
assertTrue(lErr.contains(n.object));
assertFalse(rErr.contains(n.object));
assertFalse(lrErr.contains(n.object));
acc.remove(n.object);
}
assertTrue(acc.isEmpty());
//Right run
neighborhoodIterator =
map.getNeighborhoodIterator(goodSequence, 1.3,
new double[]{0.1, 0.1, 0.1},
mut, new MutationGuide() {
@Override
public boolean allowMutation(Sequence ref, int position, byte type, byte code) {
return position > seqLeft.size() + 100;
}
}
);
acc = new HashSet<>(rErr);
while ((n = neighborhoodIterator.nextNode()) != null) {
assertTrue(rErr.contains(n.object));
assertFalse(lErr.contains(n.object));
assertFalse(lrErr.contains(n.object));
acc.remove(n.object);
}
assertTrue(acc.isEmpty());
}
}
}
/*
* Classes for randomized tests
*/
public static final class SequenceCluster<S extends Sequence<S>> {
final S sequence;
final List<MutatedSequence<S>> mutatedSequences = new ArrayList<>();
final Set<Integer> hashes = new HashSet<>();
public SequenceCluster(S sequence) {
this.sequence = sequence;
}
public void add(MutatedSequence s) {
mutatedSequences.add(s);
hashes.add(s.hashCode());
}
}
public static final class MutatedSequence<S extends Sequence<S>> {
final int[] mutations;
final S sequence;
public MutatedSequence(S sequence, int... mutations) {
this.mutations = mutations;
this.sequence = sequence;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
MutatedSequence that = (MutatedSequence) o;
if (!Arrays.equals(mutations, that.mutations)) return false;
if (!sequence.equals(that.sequence)) return false;
return true;
}
@Override
public int hashCode() {
//int result = Arrays.hashCode(mutations);
return sequence.hashCode();
}
}
}