/**
*
*/
package org.seqcode.data.io;
import java.io.File;
import java.io.IOException;
import java.util.Vector;
import org.apache.log4j.Logger;
import org.seqcode.genome.Genome;
import org.seqcode.genome.Species;
import org.seqcode.genome.location.Point;
import org.seqcode.genome.location.Region;
import org.seqcode.gseutils.ArgParser;
import org.seqcode.gseutils.Args;
import org.seqcode.gseutils.NotFoundException;
import org.seqcode.gseutils.Pair;
/**
* @author rca Reads in a file with 1 point per line, expands the points to
* regions and writes the regions to a file
*/
public class Points2RegionsConverter {
private static Logger logger = Logger.getLogger(Points2RegionsConverter.class);
private static void usage() {
String usage = "java Points2RegionsConverter --inputfile \"foo.txt\" --outputfile \"bar.txt\" --dist 50 --species \"Mus musculus;mm8\" [--overwrite true]";
System.err.println(usage);
logger.error(usage);
}
/**
* @param args
*/
public static void main(String[] args) {
String infile = "/Users/rca/matlab scratch/Sing_Smad1_top25_peaks.txt";
String outfile = "/Users/rca/matlab scratch/Sing_Smad1_top25_peaks_regions.txt";
args = new String[] {"--inputfile", infile, "--outputfile", outfile, "--dist", "50", "--species", "Mus musculus;mm8"};
ArgParser ap = new ArgParser(args);
Genome genome = null;
try {
Pair<Species, Genome> pair = Args.parseGenome(args);
if(pair==null) {
//Make fake genome... chr lengths provided???
if(ap.hasKey("geninfo")) {
genome = new Genome("Genome", new File(ap.getKeyValue("geninfo")), true);
}
else {
logger.fatal("No genome provided; provide a Gifford lab DB genome name or a file containing chromosome name/length pairs.");
Points2RegionsConverter.usage();
System.exit(1);
}
}
else {
genome = pair.cdr();
// org = pair.car();
}
}
catch (NotFoundException nfex) {
logger.fatal("", nfex);
Points2RegionsConverter.usage();
System.exit(-1);
}
String inputFilename = Args.parseString(args, "inputfile", null);
String outputFilename = Args.parseString(args, "outputfile", null);
boolean overwrite = Args.parseString(args, "overwrite", "false").equals("true");
if (new File(outputFilename).exists() && !overwrite) {
logger.fatal("Output File already exists. Specify a different output file or use the --overwrite flag to allow overwrite.");
Points2RegionsConverter.usage();
System.exit(1);
}
int dist = Args.parseInteger(args, "dist", -1);
if (dist < 1) {
logger.fatal("Must specify a positive distance to expand point");
Points2RegionsConverter.usage();
System.exit(-1);
}
logger.debug("Reading file " + inputFilename);
Vector<Point> points = null;
try {
points = DatasetsGeneralIO.readPointsFromFile(genome, inputFilename);
}
catch (IOException ioex) {
logger.fatal(ioex);
Points2RegionsConverter.usage();
System.exit(-1);
}
logger.debug(points.size() + " points read. Converting to regions...");
Vector<Region> regions = new Vector<Region>(points.size());
for (Point point : points) {
Region region = point.expand(dist);
regions.add(region);
}
logger.debug("Writing file " + outputFilename);
LineByLineFileWriter lblfw = new LineByLineFileWriter();
try {
lblfw.openFile(outputFilename);
for (Region region : regions) {
lblfw.writeLine(region.regionString());
}
}
catch (IOException ioex) {
logger.fatal(ioex);
System.exit(-1);
}
finally {
if (lblfw != null) {
try {
lblfw.closeFile();
}
catch (IOException ioex2) {
logger.fatal(ioex2);
System.exit(-1);
}
}
}
logger.debug("done!");
}
}