/*
* The MIT License (MIT)
*
* Copyright (c) 2007-2015 Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broad.igv.feature.genome;
import org.broad.igv.AbstractHeadlessTest;
import org.broad.igv.exceptions.DataLoadException;
import org.broad.igv.util.TestUtils;
import org.junit.Test;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertNotNull;
/**
* User: jacob
* Date: 2013-Mar-01
*/
public class FastaUtilsTest extends AbstractHeadlessTest {
@Test
public void testCreateIndex_01() throws Exception {
String inPath = TestUtils.DATA_DIR + "fasta/ecoli_out.padded2.fasta";
String outPath = tstCreateIndex(inPath);
FastaIndex index = new FastaIndex(outPath);
assertEquals(1, index.getSequenceNames().size());
String contig = "NC_000913_bb";
assertNotNull(index.getIndexEntry(contig));
}
public void tstCreateIndex_02(String inPath) throws Exception {
String outPath = tstCreateIndex(inPath);
FastaIndex index = new FastaIndex(outPath);
assertEquals(2, index.getSequenceNames().size());
String tA = "my:testA";
String tG = "my:testG";
String[] contigs = {tA, tG};
for (String contig : contigs) {
assertNotNull(index.getIndexEntry(contig));
}
FastaIndex.FastaSequenceIndexEntry entry = index.getIndexEntry(tA);
int tAbasesPL = 58;
//We assume that the test file will have LF line endings
//regardless of the platform it's on. May change this in the future.
//git default checkouts may change line endings
int bytesAtEnd = TestUtils.getBytesAtEnd(inPath);
int tAbytesPL = tAbasesPL + bytesAtEnd;
assertEquals(tAbasesPL, entry.getBasesPerLine());
assertEquals(tAbytesPL, entry.getBytesPerLine());
assertEquals(9 + bytesAtEnd, entry.getPosition());
int tAsize = 7 * tAbasesPL + 29;
assertEquals(tAsize, entry.getSize());
assertEquals(tA, entry.getContig());
entry = index.getIndexEntry(tG);
int tGbasesPL = 56;
int tGbytesPL = tGbasesPL + bytesAtEnd;
assertEquals(tGbasesPL, entry.getBasesPerLine());
assertEquals(tGbytesPL, entry.getBytesPerLine());
//Starting position is from tA start + tA length + length of header line
//Since "size" is number of bases, and "position" is bytes, this
//may look weird
long tGpos = tAsize + 8*bytesAtEnd + index.getIndexEntry(tA).getPosition() + 9 + bytesAtEnd;
assertEquals(tGpos, entry.getPosition());
int tGsize = 5 * tGbasesPL + 26;
assertEquals(tGsize, entry.getSize());
assertEquals(tG, entry.getContig());
GenomeManager manager = GenomeManager.getInstance();
Genome genome = manager.loadGenome(inPath, null);
String tAseq = new String(genome.getSequence(tA, 0, tAsize));
assertEquals(tAsize, tAseq.length());
String remmed = tAseq.replaceAll("A", "");
assertEquals(0, remmed.length());
String tGseq = new String(genome.getSequence(tG, 0, tGsize));
assertEquals(tGsize, tGseq.length());
remmed = tGseq.replaceAll("G", "");
assertEquals(0, remmed.length());
}
@Test
public void testCreateIndexUncompressed() throws Exception {
String inPath = TestUtils.DATA_DIR + "fasta/fasta_2contigs.fa";
tstCreateIndex_02(inPath);
}
@Test(expected = DataLoadException.class)
public void testCreateIndexUneven() throws Exception {
String inPath = TestUtils.DATA_DIR + "fasta/fasta_uneven.fa";
tstCreateIndex(inPath);
}
@Test(expected = DataLoadException.class)
public void testCreateIndexBlankLines() throws Exception {
String inPath = TestUtils.DATA_DIR + "fasta/blank_lines.fas";
tstCreateIndex(inPath);
}
@Test
public void testCreateIndexEcoli() throws Exception {
String inPath = TestUtils.LARGE_DATA_DIR + "ecoli.fasta";
String outPath = tstCreateIndex(inPath);
GenomeManager manager = GenomeManager.getInstance();
Genome genome = manager.loadGenome(inPath, null);
String chr = "gi|110640213|ref|NC_008253.1|";
assertNotNull(genome.getChromosome(chr));
//See http://www.ncbi.nlm.nih.gov/nuccore/110640213
assertEquals(4938920, genome.getTotalLength());
String beg = "ATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAG";
int begloc = 30 - 1;
int endloc = begloc + beg.length();
byte[] seq = genome.getSequence(chr, begloc, endloc);
String sseq = new String(seq);
assertEquals(beg, sseq);
}
static Map<String, Integer> testFastaBlanks;
static {
testFastaBlanks = new HashMap<String, Integer>(3);
testFastaBlanks.put("Chrom1", 632);
testFastaBlanks.put("Chrom2", 284);
testFastaBlanks.put("Chrom3", 287);
}
@Test
public void testCreateIndexTrailingBlank() throws Exception {
String inPath = TestUtils.DATA_DIR + "fasta/trailing_line.fas";
tstCreateIndexGoodBlanks(inPath, testFastaBlanks);
}
@Test
public void testCreateIndexBlankBetweenContigs() throws Exception {
String inPath = TestUtils.DATA_DIR + "fasta/blank_lines_betweencontigs.fas";
tstCreateIndexGoodBlanks(inPath, testFastaBlanks);
}
private String tstCreateIndex(String inPath) throws Exception{
String outPath = inPath + ".fai";
File outFile = new File(outPath);
outFile.delete();
outFile.deleteOnExit();
FastaUtils.createIndexFile(inPath, outPath);
return outPath;
}
public void tstCreateIndexGoodBlanks(String inPath, Map<String, Integer> expectedSizes) throws Exception {
String outPath = tstCreateIndex(inPath);
FastaIndex seq = new FastaIndex(outPath);
assertEquals(expectedSizes.size(), seq.getSequenceNames().size());
for (String chrom : expectedSizes.keySet()) {
assertEquals((int) expectedSizes.get(chrom), seq.getSequenceSize(chrom));
}
}
@Test(expected = DataLoadException.class)
public void testCreateIndexDuplicateContigs() throws Exception{
String inPath = TestUtils.DATA_DIR + "fasta/dup_contigs.fas";
String outPath = TestUtils.DATA_DIR + "out/tmp.fai";
File outFile = new File(outPath);
outFile.delete();
outFile.deleteOnExit();
FastaUtils.createIndexFile(inPath, outPath);
}
}