/*
The MIT License (MIT)
Copyright (c) 2014 Pierre Lindenbaum
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
History:
* 2014 creation
*/
package com.github.lindenb.jvarkit.util.picard;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.util.Locatable;
import htsjdk.samtools.SAMSequenceRecord;
import com.github.lindenb.jvarkit.lang.AbstractCharSequence;
import com.github.lindenb.jvarkit.util.bio.ChromosomeSequence;
/**
*
* implementation of java.lang.CharSequence for a given
* chromosome of a picard IndexedFastaSequenceFile
*
*/
public class GenomicSequence
extends AbstractCharSequence
implements ChromosomeSequence
{
private final IndexedFastaSequenceFile indexedFastaSequenceFile;
private final SAMSequenceRecord samSequenceRecord;
private byte buffer[]=null;
private int buffer_pos=-1;
private int half_buffer_capacity=1000000;
public static interface GCPercent extends Locatable
{
public int getAllCount();
public int getGCCount();
public int getATCount();
/** return true if getAllCount==0 */
public boolean isEmpty();
/** return GC% as double between 0 and 1 . return -1 if interval isEmpty */
public double getGCPercent();
/** return GC% as int between 0 and 100 . return -1 if interval isEmpty */
public int getGCPercentAsInteger();
}
private static class GCPercentImpl
implements GCPercent
{
final String contig;
final int start1;
final int end1;
int count=0;
int count_gc=0;
int count_at=0;
GCPercentImpl(String contig,int s1,int e1) {
this.contig = contig;
this.start1=s1;
this.end1=e1;
}
@Override public int getAllCount() { return this.count;}
@Override public int getGCCount() { return this.count_gc;}
@Override public int getATCount(){ return this.count_at;}
@Override
public boolean isEmpty() { return this.count == 0; }
@Override
public double getGCPercent() {
return ( this.count==0? -1.0:(this.count_gc/(double)this.count));
}
@Override
public int getGCPercentAsInteger() {
return ( this.count==0? -1:(int)(getGCPercent()*100.0));
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + count;
result = prime * result + count_at;
result = prime * result + count_gc;
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || !(obj instanceof GCPercentImpl)) {
return false;
}
final GCPercentImpl other = (GCPercentImpl) obj;
return this.count==other.count &&
this.count_at==other.count_at &&
this.count_gc==other.count_gc;
}
@Override
public String toString() {
return "gc_percent("+this.count_gc+"/"+this.count+")="+this.getGCPercent();
}
@Override
public String getContig() {
return this.contig;
}
@Override
public int getStart() {
return this.start1;
}
@Override
public int getEnd() {
return this.end1;
}
}
public GenomicSequence(final IndexedFastaSequenceFile indexedFastaSequenceFile ,final String chrom)
{
this.indexedFastaSequenceFile=indexedFastaSequenceFile;
if(this.indexedFastaSequenceFile==null) throw new NullPointerException("IndexedFastaSequenceFile is null");
if(this.indexedFastaSequenceFile.getSequenceDictionary()==null)
{
throw new IllegalArgumentException("No sequence dictionary in the reference. Use picard CreateSequenceDictionary to index the sequence https://broadinstitute.github.io/picard/command-line-overview.html.");
}
this.samSequenceRecord=this.indexedFastaSequenceFile.getSequenceDictionary().getSequence(chrom);
if(this.samSequenceRecord==null) throw new IllegalArgumentException("not chromosome "+chrom+" in reference.");
}
public SAMSequenceRecord getSAMSequenceRecord()
{
return samSequenceRecord;
}
@Override
public int hashCode()
{
return getSAMSequenceRecord().hashCode();
}
/** get the chromosome name of that genomic sequence */
@Override
public String getChrom()
{
return getSAMSequenceRecord().getSequenceName();
}
@Override
public int length()
{
return getSAMSequenceRecord().getSequenceLength();
}
@Override
public char charAt(int index0)
{
if(index0 >= length())
{
throw new IndexOutOfBoundsException("index:"+index0);
}
if(buffer!=null && index0>=buffer_pos && index0-buffer_pos < buffer.length)
{
return (char)buffer[index0-buffer_pos];
}
int minStart=Math.max(0, index0-half_buffer_capacity);
int maxEnd=Math.min(minStart+2*half_buffer_capacity,this.length());
this.buffer=this.indexedFastaSequenceFile.getSubsequenceAt(
getChrom(),
minStart+1,
maxEnd).getBases();
this.buffer_pos=minStart;
return (char)buffer[index0-minStart];
}
/** return GC% between start (inclusive, 0 based) and end (exclusive)) */
public GCPercent getGCPercent(int start,int end) {
final int L=this.length();
final GCPercentImpl gcp = new GCPercentImpl(
this.getChrom(),
start+1,
Math.min(end, L)
);
for(int i=start;i< end && i< L;++i) {
gcp.count++;
switch(this.charAt(i)) {
case 'c': case 'C':
case 'g': case 'G':
case 's': case 'S':gcp.count_gc++; break;
case 'a': case 'A':
case 't': case 'T':
case 'w': case 'W':gcp.count_at++; break;
}
}
return gcp;
}
}