/*
* The MIT License
*
* Copyright (c) 2013 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.tribble.dbsnp;
import htsjdk.tribble.AsciiFeatureCodec;
import htsjdk.tribble.Feature;
import htsjdk.tribble.annotation.Strand;
import htsjdk.tribble.readers.LineIterator;
/**
* @author aaron
*
* Example format:
* 585 chr1 433 433 rs56289060 0 + - - -/C genomic insertion unknown 0 0 unknown between 1
* 585 chr1 491 492 rs55998931 0 + C C C/T genomic single unknown 0 0 unknown exact 1
*/
public class OldDbSNPCodec extends AsciiFeatureCodec<OldDbSNPFeature> {
// the number of tokens we expect to parse from a dbSNP line
static final int expectedTokenCount = 18;
public OldDbSNPCodec() {
super(OldDbSNPFeature.class);
}
public Feature decodeLoc(String line) {
return decode(line);
}
/**
* Decode a line as a db SNP Feature.
*
* @param line the line to decode
*
* @return Return the Feature encoded by the line, or null if the line does not represent a feature (e.g. is
* a comment)
*
* The ordering of db SNP fields from the UCSC track browser:
* 1 bin
* 2 chromosome
* 3 chromosome Start
* 4 chromosome End
* 5 name
* 6 score
* 7 strand
* 8 reference base NCBI
* 9 reference base UCSC
* 10 observed base
* 11 mol. Type
* 12 class
* 13 valid
* 14 avHet
* 15 avHetSE
* 16 functions
* 17 locType
* 18 weight
*/
public OldDbSNPFeature decode(String line) {
// we may be asked to process a header line; ignore it
if (line.startsWith("#")) return null;
// split the line
String[] tokens = line.split("\\t+");
return decode(tokens);
}
@Override
public Object readActualHeader(final LineIterator reader) {
return null;
}
public OldDbSNPFeature decode(String[] tokens){
// check to see if we've parsed the string into the right number of tokens (expectedTokenCount)
if (tokens.length != expectedTokenCount)
return null;
// throw new CodecLineParsingException("the dbSNP line didn't have the expected number of tokens " +
// "(expected = " + expectedTokenCount + ", saw = " + tokens.length + " on " +
//
// "line = " + line + ")");
// create a new feature from the line
int start = Integer.valueOf(tokens[2])+1;
int stop = Integer.valueOf(tokens[3]);
stop = (stop < start) ? start : stop; // Indels can be of length zero in dbSNP, we make them length one
OldDbSNPFeature feature = new OldDbSNPFeature(tokens[1],
start,
stop);
feature.setRsID(tokens[4]);
feature.setScore(Integer.valueOf(tokens[5]));
feature.setStrand(tokens[6].equals("+") ? Strand.POSITIVE : Strand.NEGATIVE);
feature.setNCBIRefBase(tokens[7]);
feature.setUCSCRefBase(tokens[8]);
// split the observed bases
feature.setObserved(tokens[9].split("/"));
feature.setMolType(tokens[10]);
feature.setVariantType(tokens[11]);
feature.setValidationStatus(tokens[12]);
feature.setAvHet(Double.valueOf(tokens[13]));
feature.setAvHetSE(Double.valueOf(tokens[14]));
feature.setFunction(tokens[15]);
feature.setLocationType(tokens[16]);
feature.setWeight(Integer.valueOf(tokens[17]));
// return the setup feature
return feature;
}
}