/*
* Created on Mar 9, 2006
*/
package org.seqcode.gsebricks.verbs.location;
import java.util.regex.*;
import org.seqcode.genome.Genome;
import org.seqcode.genome.location.NamedRegion;
import org.seqcode.genome.location.Region;
import org.seqcode.genome.location.StrandedRegion;
import org.seqcode.gsebricks.verbs.Mapper;
/**
* @author shaun
*/
public class StrandedRegionParser implements Mapper<String,Region> {
private static Pattern regPatt;
static {
regPatt = Pattern.compile("(\\w+):(\\d+)-(\\d+):([^:\\s]+)");
}
private Genome genome;
private int chromIndex, startIndex, endIndex, nameIndex, minLength;
public StrandedRegionParser(Genome g) {
genome = g;
chromIndex = 0;
startIndex = 1;
endIndex = 2;
minLength = (Math.max(chromIndex, Math.max(startIndex, endIndex))) + 1;
}
/* (non-Javadoc)
* @see org.seqcode.gsebricks.verbs.Filter#execute(null)
*/
public StrandedRegion execute(String input) {
String[] array = input.split("\\s+");
String chrom = array[chromIndex];
Matcher m = regPatt.matcher(chrom);
if(m.matches()) {
chrom = m.group(1);
chrom = chrom.replaceFirst("chr", "");
int start = Integer.parseInt(m.group(2));
int end = Integer.parseInt(m.group(3));
char strand = '?';
String strandstr = m.group(4);
if(strandstr.length() > 0) { strand = strandstr.charAt(0); }
return new StrandedRegion(genome, chrom, start, end, strand);
} else {
System.err.println("Line \"" + input + "\" is incorrectly formatted for a StrandedRegion");
return null;
}
}
}