/*
* The MIT License
*
* Copyright (c) 2013 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.tribble.gelitext;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.tribble.AsciiFeatureCodec;
import htsjdk.tribble.Feature;
import htsjdk.tribble.exception.CodecLineParsingException;
import htsjdk.tribble.readers.LineIterator;
import java.util.Arrays;
/**
* <p/>
* A codec for parsing geli text files, which is the text version of the geli binary format.
* <p/>
* <p/>
* GELI text has the following tab-seperated fields:
* contig the contig (string)
* position the position on the contig (long)
* refBase the reference base (char)
* depthOfCoverage the depth of coverage at this position (int)
* maximumMappingQual the maximum mapping quality of a read at this position (int)
* genotype the called genotype (string)
* LODBestToReference the LOD score of the best to the reference (double)
* LODBestToNext the LOD score of the best to the next best genotype (double)
* likelihoods the array of all genotype likelihoods, in ordinal ordering (array of 10 doubles, in ordinal order)
*
* @author aaron
*/
public class GeliTextCodec extends AsciiFeatureCodec<GeliTextFeature> {
public GeliTextCodec() {
super(GeliTextFeature.class);
}
public Feature decodeLoc(final String line) {
return decode(line);
}
@Override
public GeliTextFeature decode(final String line) {
// clean out header lines and comments
if (line.startsWith("#") || line.startsWith("@"))
return null;
// parse into tokens
final String[] parts = line.trim().split("\\s+");
return decode(parts);
}
@Override
public Object readActualHeader(LineIterator reader) {
return null;
}
public GeliTextFeature decode(final String[] tokens) {
try {
// check that we got the correct number of tokens in the split
if (tokens.length != 18)
throw new CodecLineParsingException("Invalid GeliTextFeature row found -- incorrect element count. Expected 18, got " + tokens.length + " line = " + CollectionUtil.join(Arrays.asList(tokens), " "));
// UPPER case and sort
final char[] x = tokens[5].toUpperCase().toCharArray();
Arrays.sort(x);
final String bestGenotype = new String(x);
final double[] genotypeLikelihoods = new double[10];
for (int pieceIndex = 8, offset = 0; pieceIndex < 18; pieceIndex++, offset++) {
genotypeLikelihoods[offset] = Double.valueOf(tokens[pieceIndex]);
}
return new GeliTextFeature(tokens[0],
Long.valueOf(tokens[1]),
Character.toUpperCase(tokens[2].charAt(0)),
Integer.valueOf(tokens[3]),
Integer.valueOf(tokens[4]),
DiploidGenotype.toDiploidGenotype(bestGenotype),
Double.valueOf(tokens[6]),
Double.valueOf(tokens[7]),
genotypeLikelihoods);
} catch (CodecLineParsingException e) {
e.printStackTrace();
throw new RuntimeException("Unable to parse line " + CollectionUtil.join(Arrays.asList(tokens), " "), e);
} catch (NumberFormatException e) {
e.printStackTrace();
throw new RuntimeException("Unable to parse line " + CollectionUtil.join(Arrays.asList(tokens), " "), e);
}
}
}