// Copyright (C) 2011-2012 CRS4.
//
// This file is part of Seal.
//
// Seal is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// Seal is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with Seal. If not, see <http://www.gnu.org/licenses/>.
package it.crs4.seal.common;
import java.util.Map;
import java.util.LinkedHashMap;
import java.util.Iterator;
import java.io.LineNumberReader;
import java.io.IOException;
import java.io.Reader;
public class BwaRefAnnotation implements Iterable<BwaRefAnnotation.Contig>
{
public static class UnknownContigException extends java.lang.RuntimeException {
private static final long serialVersionUID = 1L;
public UnknownContigException(String msg) {
super(msg);
}
}
public static class Contig
{
public int id;
public long start, length;
public String name;
public Contig(String name, int id, long start, long length)
{
this.id = id;
this.start = start;
this.length = length;
this.name = name;
}
public int getId() { return id; }
public long getStart() { return start; }
public long getLength() { return length; }
public String getName() { return name; }
};
private enum AnnScannerState {
NameLine,
CoordLine
}
private Map<String, Contig> contigMap;
private long referenceLength;
public BwaRefAnnotation()
{
// Use a LinkedHashMap to store the contig information. This gives us O(1) look-ups
// by name and preserves the contig order for iteration. The initial capacity is 30.
contigMap = new LinkedHashMap<String, Contig>(30);
referenceLength = -1;
}
public BwaRefAnnotation(Reader in) throws IOException
{
contigMap = new LinkedHashMap<String, Contig>(30); // initial capacity of 30
referenceLength = -1;
this.load(in);
}
public void load(Reader in) throws IOException, FormatException
{
LineNumberReader input = new LineNumberReader(in);
String line = null;
line = input.readLine();
if (line == null)
throw new FormatException("Empty annotations file");
try
{
long[] row = scanPosLine(line);
referenceLength = row[0];
if (referenceLength <= 0)
throw new FormatException("Invalid reference length " + referenceLength);
int nContigs = (int)row[1]; // cast to avoid warning about loss of precision
if (nContigs <= 0)
throw new FormatException("Invalid number of contigs " + nContigs);
AnnScannerState state = AnnScannerState.NameLine;
int contigCount = 0;
String lastContigName = null;
line = input.readLine();
while (line != null)
{
if (line.equals("")) // skip blank lines
continue;
if (state == AnnScannerState.NameLine)
{
String[] fields = scanNameLine(line);
contigCount += 1;
if (contigCount > nContigs)
throw new FormatException("There are more contigs than expected (first line says we should have " + nContigs + ")");
lastContigName = fields[1];
state = AnnScannerState.CoordLine;
}
else // state is CoordLine
{
long[] fields = scanPosLine(line);
contigMap.put(lastContigName, new Contig(lastContigName, contigCount, fields[0], fields[1]));
state = AnnScannerState.NameLine;
}
line = input.readLine();
}
if (state != AnnScannerState.NameLine)
throw new FormatException("last entry is incomplete (found the name line but not the coordinates)");
if (contigCount < nContigs)
throw new FormatException("Not enough contig records. Header said we should have " + nContigs + ", but we only found " + contigCount);
}
catch (NumberFormatException e) {
throw new FormatException("Line " + input.getLineNumber() + ": invalid number (" + e.getMessage() + "). Original line: " + line);
}
catch (FormatException e) {
// add line number to message
throw new FormatException("Line " + input.getLineNumber() + ": " + e.getMessage());
}
}
private long[] scanPosLine(String line) throws NumberFormatException
{
String[] fields = line.split("\\s+");
if (fields.length != 3)
throw new FormatException("Wrong number of fields (" + fields.length + "). Expected 3");
long[] retval = new long[3];
for (int i = 0; i <= 2; ++i)
{
retval[i] = Long.parseLong(fields[i]);
if (retval[i] < 0)
throw new NumberFormatException();
}
return retval;
}
private String[] scanNameLine(String line)
{
String[] fields = line.split("\\s+", 3);
return fields;
}
public long getReferenceLength()
{
return referenceLength;
}
public int getContigId(String name)
{
return getContig(name).id;
}
public long getAbsCoord(String contig_name, long localCoord)
{
Contig contig = getContig(contig_name);
return contig.start + localCoord;
}
public Contig getContig(String name)
{
Contig c = contigMap.get(name);
if (c != null)
return c;
else
throw new UnknownContigException("Unknown contig name '" + name + "'");
}
public Iterator<Contig> iterator()
{
return contigMap.values().iterator();
}
}