package uk.co.flax.biosolr.pdbe.fasta;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.rmi.RemoteException;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import uk.ac.ebi.webservices.axis1.stubs.fasta.InputParameters;
import uk.ac.ebi.webservices.axis1.stubs.fasta.JDispatcherService_PortType;
public class FastaJob implements Runnable {
private static final Logger LOG = Logger.getLogger(FastaJob.class.getName());
private JDispatcherService_PortType fasta;
private String email;
private InputParameters params;
private FastaJobResults results;
// job id created in run()
private String jobId;
// exception caught during run(), if any, and the run status
private IOException exception;
private String status;
private boolean interrupted;
// regexp patterns
private Pattern pattern1 = Pattern.compile("^PDB:(.*?_.*?)\\s+(.+?)\\s+([0-9.e-]+?)$|^PRE_PDB:(\\w{4} Entity)\\s+(.+?)\\s+([0-9.e-]+?)$");
private Pattern pattern2 = Pattern.compile("^>>PDB:(.*?_.*?)\\s+.*?$|^>>PRE_PDB:(\\w{4} Entity).*?$");
private Pattern pattern3 = Pattern.compile("^Smith-Waterman score:.*?\\;(.*?)\\% .*? overlap \\((.*?)\\)$");
private Pattern pattern4 = Pattern.compile("^EMBOS[S ] (\\s*.*?)$");
private Pattern pattern5 = Pattern.compile("^PDB:.*? (\\s*.*?)$|^PRE_PD.*? (\\s*.*?)$");
public IOException getException() {
return exception;
}
public String getStatus() {
return status;
}
public FastaJobResults getResults() throws IOException {
if (results == null) {
byte[] result = getRawResults();
InputStream in = new ByteArrayInputStream(result);
results = parseResults(new BufferedReader(new InputStreamReader(in)));
}
return results;
}
public byte[] getRawResults() throws RemoteException {
String id = fasta.getResultTypes(jobId)[0].getIdentifier();
return fasta.getResult(jobId, id, null);
}
public boolean isInterrupted() {
return interrupted;
}
public boolean resultsOk() {
return exception == null && !interrupted && status.equals(FastaStatus.DONE);
}
public FastaJob(JDispatcherService_PortType fasta, String email, InputParameters params) {
this.fasta = fasta;
this.email = email;
this.params = params;
jobId = null;
results = null;
exception = null;
status = null;
interrupted = false;
}
public String getEmail() {
return email;
}
public InputParameters getParams() {
return params;
}
private int firstGroup(Matcher m) {
for (int n = 1; n <= m.groupCount(); ++n) {
if (m.group(n) != null) {
return n;
}
}
return 0;
}
public void run() {
try {
jobId = fasta.run(email, "", params);
do {
Thread.sleep(200);
status = fasta.getStatus(jobId);
LOG.log(Level.FINE, status);
} while (status.equals(FastaStatus.RUNNING));
if (!status.equals(FastaStatus.DONE)) {
LOG.log(Level.SEVERE, "Error with job: " + jobId + " (" + status + ")");
}
} catch (InterruptedException e) {
interrupted = true;
} catch (IOException e) {
exception = e;
}
}
// create an Alignment from a matching line
private PDb.Alignment parseAlignment(Matcher matcher) {
int n = firstGroup(matcher);
String pdbIdChain = matcher.group(n);
if (pdbIdChain.contains("Entity")) {
pdbIdChain = pdbIdChain.replaceFirst(" ", "_");
}
String[] s = pdbIdChain.split("_");
double eValue = new Double(matcher.group(n + 2));
return new PDb.Alignment(new PDb.Id(s[0]), s[1], eValue);
}
private FastaJobResults parseResults(BufferedReader reader) throws IOException {
results = new FastaJobResults();
String line = "";
while (line != null) {
Matcher matcher1 = pattern1.matcher(line);
Matcher matcher2 = pattern2.matcher(line);
if (matcher1.find()) {
PDb.Alignment alignment = parseAlignment(matcher1);
results.addAlignment(alignment);
line = reader.readLine();
} else if (matcher2.find()) {
int n = firstGroup(matcher2);
String pdbIdChain = matcher2.group(n);
if (pdbIdChain.contains("Entity")) {
pdbIdChain = pdbIdChain.replaceFirst(" ", "_");
}
String[] bits = pdbIdChain.split("_");
PDb.Alignment a = results.getAlignment(bits[0], bits[1]);
if (a == null) {
throw new RuntimeException("Alignment not yet seen: " + pdbIdChain);
}
// sometimes an alignment appears twice in the results - need to ignore all
// but the first (but still need to consume lines)
boolean complete = a.isComplete();
while ((line = reader.readLine()) != null) {
Matcher m2 = pattern2.matcher(line);
Matcher m3 = pattern3.matcher(line);
Matcher m4 = pattern4.matcher(line);
Matcher m5 = pattern5.matcher(line);
if (m3.find()) {
double identity = new Double(m3.group(1));
String overLap = m3.group(2);
String[] o = overLap.split(":");
String[] oIn = o[0].split("-");
String[] oOut = o[1].split("-");
if (! complete) {
a.setPercentIdentity(identity);
try {
a.setQueryOverlapStart(Integer.valueOf(oIn[0]));
a.setQueryOverlapEnd(Integer.valueOf(oIn[1]));
a.setDbOverlapStart(Integer.valueOf(oOut[0]));
a.setDbOverlapEnd(Integer.valueOf(oOut[1]));
} catch (NumberFormatException e) {
throw new IOException("Error parsing line: " + line);
}
}
} else if (m2.find()) {
break;
} else if (m4.find()) {
if (! complete) {
a.addQuerySequence(m4.group(1));
}
} else if (m5.find()) {
int n4 = firstGroup(m5);
if (! complete) {
a.addReturnSequence(m5.group(n4));
}
}
}
} else {
line = reader.readLine();
}
}
return results;
}
}