package org.seqcode.data.readdb;
import org.apache.commons.cli.*;
import java.util.*;
import java.io.*;
/**
* <p>Imports hits to the db.
* Usage:
* <pre>cat hits.txt | ImportHits -H nanog.csail.mit.edu -P 5200 -a "Gcn4 ChipSeq" -u arolfe -p SECRET</code>
*
* <p>Lines in the input must be of the form<br>
* <pre>chromosome\tstart\tstrand\tlength\tweight</pre><br>
* or
* <pre>chromosomeone\tstartone\tstrandone\tlengthone\tchromosometwo\tstarttwo\tstrandtwo\tlengthtwo\tweight\tpairCode</pre>
*
* <p>where start is the position of the 5' end of the read.
* <p>and where pairCode is 1 if the pair represents a link between two mated reads, and 0 otherwise (e.g. if the pair represents two sections of the same gapped read alignment).
*
* <p>For Mahony lab use:
* <ul>
* <li> The chromosome must be numeric and should be the chromosome id from core.
* <li> The alignment should be numeric and should the alignment identifier from the seqdata schema
* </ul>
*
*/
public class ImportHits {
String alignname;
String hostname;
String username, password;
int portnum;
private Client client=null;
private int chunk = 10000000;
private boolean isType2=false;
public static void main(String args[]) {
ImportHits importer = null;
try {
importer = new ImportHits();
importer.parseArgs(args);
importer.run(System.in);
} catch (Exception e) {
e.printStackTrace();
} finally {
importer.close();
}
}
public ImportHits(String hostname,
int port,
String alignname,
String username, String password) {
this.hostname = hostname;
this.portnum = port;
this.alignname = alignname;
this.username = username;
this.password = password;
}
/* use this constructor and then call parseArgs */
public ImportHits () {
username = null;
password = null;
hostname = null;
portnum = -1;
}
public void parseArgs(String args[]) throws IllegalArgumentException, ParseException {
Options options = new Options();
options.addOption("H","hostname",true,"server to connect to");
options.addOption("P","port",true,"port to connect to");
options.addOption("a","align",true,"alignment name");
options.addOption("u","user",true,"username");
options.addOption("p","passwd",true,"password");
options.addOption("h","help",false,"print help message");
options.addOption("c","chunk",true,"send this many hits to the server at once");
options.addOption("t2","type2",false,"type2 single-end hits (e.g. read 2 hits)");
CommandLineParser parser = new GnuParser();
CommandLine line = parser.parse( options, args, false );
if (line.hasOption("help")) {
printHelp();
System.exit(0);
}
if (line.hasOption("port")) {
portnum = Integer.parseInt(line.getOptionValue("port"));
}
if (line.hasOption("hostname")) {
hostname = line.getOptionValue("hostname");
}
if (line.hasOption("align")) {
alignname = line.getOptionValue("align");
} else {
System.err.println("Must supply alignment name as --align");
throw new IllegalArgumentException("Must supply alignment name as --align");
}
if (line.hasOption("user")) {
username = line.getOptionValue("user");
}
if (line.hasOption("passwd")) {
password = line.getOptionValue("passwd");
}
if (line.hasOption("chunk")) {
chunk = Integer.parseInt(line.getOptionValue("chunk"));
}
if (line.hasOption("type2")) {
isType2 = true;
}
}
public void printHelp() {
System.out.println("ImportHits to ReadDB");
System.out.println("usage: cat foo.sam | java org.seqcode.data.readdb.SAMToReadDB | java org.seqcode.data.readdb.ImportHits \\");
System.out.println(" --align alignmentname");
System.out.println(" --type2 the imported hits are type 2 single end hits (e.g. read 2 hits in certain circumstances)");
System.out.println(" [--help] print usage");
System.out.println("");
System.out.println("Input format is tab delimited with either five or ten fields per line.");
System.out.println("For single-ended reads, fields are ");
System.out.println(" (1) chromosome (2) position of 5' end of read (3) strand (4) length (5) weight");
System.out.println("For paired-end reads, fields are ");
System.out.println(" (1) L chromosome (2) position of 5' end of L read (3) L strand (4) L length ");
System.out.println(" (5) R chromosome (6) position of 5' end of R read (7) R strand (8) R length (9) weight (10) pair between mates indicator");
}
public void run(InputStream instream) throws IOException, ClientException {
BufferedReader reader = new BufferedReader(new InputStreamReader(instream));
String line;
int lineno = 0;
List<SingleHit> hits = new ArrayList<SingleHit>();
List<PairedHit> paired = new ArrayList<PairedHit>();
if (hostname != null && portnum > 0 && username != null && password != null) {
client = new Client(hostname,
portnum,
username,
password);
} else {
client = new Client();
}
System.err.println("Created Client");
while ((line = reader.readLine()) != null) {
String pieces[] = line.split("\\t");
if (pieces.length == 5) {
hits.add(new SingleHit(Integer.parseInt(pieces[0]),
Integer.parseInt(pieces[1]),
Float.parseFloat(pieces[4]),
pieces[2].equals("+"),
Short.parseShort(pieces[3])));
} else if (pieces.length == 10) {
paired.add(new PairedHit(Integer.parseInt(pieces[0]),
Integer.parseInt(pieces[1]),
pieces[2].equals("+"),
Short.parseShort(pieces[3]),
Integer.parseInt(pieces[4]),
Integer.parseInt(pieces[5]),
pieces[6].equals("+"),
Short.parseShort(pieces[7]),
Float.parseFloat(pieces[8]),
Integer.parseInt(pieces[9])));
} else {
System.err.println("Bad line size " + line);
}
if (lineno++ % 100000 == 0) {
System.err.println("Read through line " + lineno);
}
if (lineno % chunk == 0) {
if (hits.size() > 0) {
try {
client.storeSingle(alignname, hits, isType2);
hits.clear();
} catch (Exception e) {
System.err.println("Failed: " + e.toString());
e.printStackTrace();
}
}
if (paired.size() > 0) {
try {
client.storePaired(alignname, paired);
paired.clear();
} catch (Exception e) {
System.err.println("Failed: " + e.toString());
e.printStackTrace();
}
}
}
}
System.err.println("Read lines");
if (hits.size() > 0) {
try {
client.storeSingle(alignname, hits, isType2);
} catch (Exception e) {
System.err.println("Failed: " + e.toString());
e.printStackTrace();
}
}
if (paired.size() > 0) {
try {
client.storePaired(alignname, paired);
} catch (Exception e) {
System.err.println("Failed: " + e.toString());
e.printStackTrace();
}
}
System.err.println("Stored");
}
public void close(){
if (client != null) {
client.close();
}
}
}