/* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ package abra; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; /** * Loads region info into memory. * * @author Lisle E. Mose (lmose at unc dot edu) */ public class RegionLoader { private static final int SEQNAME_IDX = 0; private static final int BED_START_IDX = 1; private static final int BED_END_IDX = 2; private static final int KMER_SIZE_IDX = 3; public List<Feature> load(String regionFile, boolean hasPresetKmers) throws FileNotFoundException, IOException { List<Feature> features = new ArrayList<Feature>(); int start = BED_START_IDX; int end = BED_END_IDX; BufferedReader reader = new BufferedReader(new FileReader(regionFile)); try { String lastChr = ""; long lastStart = -1; String line = reader.readLine(); int cnt = 0; while (line != null) { if(line.startsWith("#")) { line = reader.readLine(); continue; } String[] fields = line.split("\t"); String chromosome = fields[SEQNAME_IDX]; long startPos = Long.valueOf(fields[start]); long endPos = Long.valueOf(fields[end]); if (startPos > endPos) { throw new IllegalArgumentException("Region end must be greater than region start in target BED file: " + line); } if (lastChr.equals(chromosome) && startPos < lastStart) { throw new IllegalArgumentException("Target BED file must be sorted in increasing coordinate order (grouped by chromosome): " + line); } Feature feature = new Feature(chromosome, startPos, endPos); if (fields.length >= KMER_SIZE_IDX+1 && hasPresetKmers) { int kmerSize = Integer.parseInt(fields[KMER_SIZE_IDX]); feature.setKmer(kmerSize); } features.add(feature); line = reader.readLine(); cnt++; lastChr = chromosome; lastStart = startPos; if ((cnt % 100000) == 0) { System.err.println("Loaded " + cnt + " regions"); System.err.flush(); } } } finally { reader.close(); } return features; } public static List<Feature> collapseRegions(Collection<Feature> regions, int maxGap) { List<Feature> collapsedRegions = new ArrayList<Feature>(); Feature currentRegion = null; for (Feature region : regions) { if (currentRegion != null) { if ((currentRegion.getSeqname().equals(region.getSeqname())) && (currentRegion.getEnd() + (maxGap) >= region.getStart())) { currentRegion.setEnd(region.getEnd()); } else { collapsedRegions.add(currentRegion); currentRegion = region; } } else { currentRegion = region; } } if (currentRegion != null) { collapsedRegions.add(currentRegion); } System.err.println("Collapsed regions from " + regions.size() + " to " + collapsedRegions.size()); return collapsedRegions; } public static void main(String[] args) throws Exception { RegionLoader loader = new RegionLoader(); // List<Feature> regions = loader.load("/home/lmose/dev/abra/issue12/test.bed"); List<Feature> regions = loader.load("/home/lmose/dev/abra/issue12/test2.bed", false); regions = RegionLoader.collapseRegions(regions, 100); /* for (Feature region : regions) { if (region.getLength() <= 0) { System.out.println(region + " - " + region.getLength()); } } */ regions = ReAligner.splitRegions(regions); for (Feature region : regions) { if (region.getLength() <= 0) { System.err.println(region + " - " + region.getLength()); } } } }