package org.genedb.crawl;
import java.io.File;
import java.io.IOException;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMRecordIterator;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.SAMFileReader.ValidationStringency;
import org.apache.log4j.Logger;
import uk.ac.sanger.artemis.components.variant.FTPSeekableStream;
import java.net.SocketException;
import java.net.URL;
import java.util.Map;
import junit.framework.TestCase;
public class FTPTest extends TestCase {
private static final Logger logger = Logger.getLogger(FTPTest.class);
private static final String[] urls = new String[] {
//"ftp://ftp.sanger.ac.uk/pub/mouse_genomes/current_bams/129S1.bam",
//"ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data/NA19331/alignment/NA19331.chromX.LS454.ssaha2.LWK.exon_targetted.20100311.bam"
//"ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data/NA19331/alignment/NA19331.chrom20.ILLUMINA.bwa.LWK.low_coverage.20111114.bam"
//"ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data/NA19331/alignment/NA19331.chrom20.ILLUMINA.bwa.LWK.low_coverage.20120522.bam"
"ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/HG00109/alignment/HG00109.unmapped.ILLUMINA.bwa.GBR.low_coverage.20130415.bam"
};
public void testURLs() throws SocketException, IOException {
for (String url : urls) {
run (new URL(url));
}
}
public void run(URL url) throws SocketException, IOException {
FTPSeekableStream fss = new FTPSeekableStream(url);
File index = fss.getIndexFile();
SAMFileReader reader = new SAMFileReader(fss, index, false);
reader.getFileHeader();
reader.setValidationStringency(ValidationStringency.SILENT);
logger.info("attributes");
for (Map.Entry<String, String> entry : reader.getFileHeader().getAttributes()) {
logger.info(String.format("%s : %s", entry.getKey(), entry.getValue()));
}
logger.info("sequences");
for (SAMSequenceRecord ssr : reader.getFileHeader().getSequenceDictionary().getSequences()) {
logger.info(String.format("%s : %s ", ssr.getSequenceName(), ssr.getSequenceLength() ));
}
logger.info("sequences");
for (SAMSequenceRecord ssr : reader.getFileHeader().getSequenceDictionary().getSequences()) {
// if (! ssr.getSequenceName().equals("NT_166325")) {
// continue;
// }
int length = ssr.getSequenceLength();
int min = 100;
int max = 100000;
if (min >= length) {
min = 0;
}
//logger.warn((max >= length));
if (max >= length) {
max = length;
}
if (min >= max) {
min = 0;
}
logger.info(String.format("Sequence: %s (%s) %s-%s", ssr.getSequenceName(), length, min, max));
SAMRecordIterator i = reader.query(ssr.getSequenceName(), min, max, false);
while ( i.hasNext() ) {
SAMRecord record = i.next();
logger.info(String.format("Read: %s (%s (%s-%s) %s) / %s",
record.getReadName(),
min,
record.getAlignmentStart(),
record.getAlignmentEnd(),
max,
record.getFlags()));
assertTrue(record.getAlignmentStart() >= min);
}
i.close();
logger.info("_________________________________________________");
}
logger.info("Done");
}
}