// Copyright (C) 2011-2012 CRS4.
//
// This file is part of Seal.
//
// Seal is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// Seal is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with Seal. If not, see <http://www.gnu.org/licenses/>.
package it.crs4.seal.common;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.*;
public class MdOp
{
protected static final Pattern MatchPattern = Pattern.compile("\\d++");
protected static final Pattern MismatchPattern = Pattern.compile("[AGCTN]++");
protected static final Pattern DeletePattern = Pattern.compile("\\^[AGCTN]++");
public static enum Type {
Match,
Mismatch,
Delete;
}
private Type op;
private int len;
private String seq; // Empty string for match.
// For mismatch, bases different from reference and for
// deletion the bases deleted from reference.
// note: does not include '^' character
public MdOp(MdOp.Type op, int len) {
this.op = op;
this.len = len;
this.seq = "";
}
public MdOp(MdOp.Type op, int len, String seq) {
if (op == Type.Match && !seq.isEmpty())
throw new IllegalArgumentException("non-empty sequence " + seq + " provided for Match operator");
this.op = op;
this.len = len;
this.seq = seq;
}
public MdOp.Type getType() { return op; }
public int getLen() { return len; }
public String getSeq() { return seq; }
public boolean equals(Object other)
{
if (other instanceof MdOp)
{
MdOp otherMd = (MdOp) other;
return otherMd.op == this.op && otherMd.len == this.len &&
((this.seq == null && otherMd.seq == null) ||
(this.seq != null && otherMd.seq != null && otherMd.seq.equals(this.seq)));
}
else
return false;
}
public String toString() { return "(" + op + "," + len + "," + seq + ")"; }
/**
* Scan an MD tag into a list of MdOp elements.
*/
public static List<MdOp> scanMdTag(String tag) throws FormatException
{
ArrayList<MdOp> result = new ArrayList<MdOp>(5);
int length;
int end = tag.length();
Matcher m = MatchPattern.matcher(tag);
if (!m.lookingAt())
throw new FormatException("Invalid MD tag '" + tag + "'. Tag doesn't start with a number.");
length = Integer.parseInt(m.group());
if (length > 0)
result.add(new MdOp(Type.Match, length));
// else don't add a 0-length op
// advance the scanner
m.region(m.end(), end);
while (tag.length() > m.regionStart())
{
m.usePattern(MismatchPattern);
if (m.lookingAt()) // found a mismatch
{
result.add(new MdOp(Type.Mismatch, m.group().length(), m.group()));
// advance the scanner
m.region(m.end(), end);
}
else
{
m.usePattern(DeletePattern);
if (m.lookingAt()) // found a deletion
{
result.add(new MdOp(Type.Delete, m.group().length() - 1, m.group().substring(1))); // -1 for the ^ character
// advance the scanner
m.region(m.end(), end);
}
else
throw new FormatException("Invalid MD tag '" + tag + "' (pos " + m.regionStart() + "). Match number not followed by a mismatch or delete.");
}
m.usePattern(MatchPattern);
if (m.lookingAt())
{
length = Integer.parseInt(m.group());
if (length > 0)
result.add(new MdOp(Type.Match, length));
// advance the scanner
m.region(m.end(), end);
}
else
throw new FormatException("Invalid MD tag '" + tag + "' (pos " + m.regionStart() + "). Mismatch or delete not followed by a match number.");
}
return result;
}
}