package org.seqcode.gsebricks.verbs.location; import java.util.regex.*; import org.seqcode.genome.Genome; import org.seqcode.genome.location.Point; import org.seqcode.genome.location.StrandedPoint; import org.seqcode.gsebricks.verbs.Mapper; /** * @author shaun */ public class StrandedPointParser implements Mapper<String,Point> { private static Pattern regPatt; static { regPatt = Pattern.compile("(\\w+):(\\d+):([^:\\s]+)"); } private Genome genome; private int chromIndex, startIndex, nameIndex; public StrandedPointParser(Genome g) { genome = g; chromIndex = 0; startIndex = 1; } /* (non-Javadoc) * @see org.seqcode.gsebricks.verbs.Filter#execute(null) */ public StrandedPoint execute(String input) { String[] array = input.split("\\s+"); String chrom = array[chromIndex]; Matcher m = regPatt.matcher(chrom); if(m.matches()) { chrom = m.group(1); chrom = chrom.replaceFirst("chr", ""); int start = Integer.parseInt(m.group(2)); char strand = '?'; String strandstr = m.group(3); if(strandstr.length() > 0) { strand = strandstr.charAt(0); } return new StrandedPoint(genome, chrom, start, strand); } else { System.err.println("Line \"" + input + "\" is incorrectly formatted for a StrandedPoint"); return null; } } }