package org.genedb.db.loading.auxiliary;
import org.gmod.schema.feature.HelixTurnHelix;
import org.gmod.schema.feature.Polypeptide;
import org.gmod.schema.mapped.Analysis;
import org.apache.log4j.Logger;
import org.hibernate.HibernateException;
import org.hibernate.Session;
import org.hibernate.jdbc.Work;
import org.springframework.orm.hibernate3.SessionFactoryUtils;
import org.springframework.transaction.annotation.Transactional;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Class to load helix-turn-helix features. The results are expected in a certain format specified by the pattern object below.
* TODO: Add the capability to handle version number when version is known
*
* @author nds
*
*/
public class HTHLoader extends Loader {
private static final Logger logger = Logger.getLogger(HTHLoader.class);
private int number = 0;
private String analysisProgramVersion = "unknown"; //Cannot be null in database; get right version number when known; update code to handle commandline args
private Analysis analysis;
boolean notFoundNotFatal = false;
@Override
protected Set<String> getOptionNames() {
Set<String> options = new HashSet<String>();
Collections.addAll(options, "hth-version", "not-found-not-fatal");
return options;
}
@Override
protected boolean processOption(String optionName, String optionValue) {
if (optionName.equals("hth-version")) {
analysisProgramVersion = optionValue;
return true;
}
else if (optionName.equals("not-found-not-fatal")) {
if (!optionValue.equals("true") && !optionValue.equals("false")) {
return false;
}
notFoundNotFatal = Boolean.valueOf(optionValue);
return true;
}
return false;
}
@Override
public void doLoad(InputStream inputStream, Session session) throws IOException {
// Add analysis
analysis = new Analysis();
analysis.setProgram("helixturnhelix");
analysis.setProgramVersion(analysisProgramVersion);
sequenceDao.persist(analysis);
HTHFile file = new HTHFile(inputStream);
int n=1;
for (HTHHit hit: file.hits()) {
logger.info(String.format("[%d/%d] Processing helix-turn-helix for '%s'", n++, file.hits().size(), hit.getName()));
loadHit(hit);
if (n % 50 == 1) {
logger.info("Clearing session");
session.clear();
}
}
}
private void loadHit(HTHHit hit) {
Polypeptide polypeptide = getPolypeptideByMangledName(hit.getName());
logger.debug(String.format("Processing feature of name '%s'", hit.getName()));
if (polypeptide == null) {
if (notFoundNotFatal) {
logger.error(String.format("Could not find polypeptide for key '%s'", hit.getName()));
return;
}
else {
throw new RuntimeException(String.format("Could not find polypeptide for key '%s'", hit.getName()));
}
}else {
number++;
}
//All hits should be of type helix-turn-helix at this stage.
//The createHelixTurnHelix method takes all the essential information from a hit and creates the corresponding feature & featureloc
HelixTurnHelix helixTurnHelix = sequenceDao.createHelixTurnHelix(polypeptide, hit.getStart(), hit.getEnd(), hit.getScore(), hit.getMaxScoreAt(), hit.getStdDeviations(), analysis);
sequenceDao.persist(helixTurnHelix);
}
@Transactional
public void clear(final String organismCommonName, final String analysisProgram) throws HibernateException, SQLException {
Session session = SessionFactoryUtils.getSession(sessionFactory, false);
session.doWork(new Work() {
public void execute(Connection connection) throws SQLException {
new ClearHTH(connection, organismCommonName, analysisProgram).clear();
}
});
}
}
/* Class corresponding to HTH file */
class HTHFile {
private static final Logger logger = Logger.getLogger(HTHFile.class);
private List<HTHHit> hits = new ArrayList<HTHHit>();
public HTHFile(InputStream inputStream) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
String previousLine = null, line;
while (null != (line = reader.readLine())) { //While not end of file
if (line.startsWith("Feature: ")) {
if (previousLine == null) {
throw new IllegalStateException();
}
StringBuilder sb = new StringBuilder(previousLine);
while (0 < (line = reader.readLine()).length()) {
sb.append(line);
sb.append('\n');
}
logger.trace(sb);
parseSummary(sb);
}
previousLine = line;
}
}
public Collection<HTHHit> hits() {
return hits;
}
private static final Pattern SUMMARY_PATTERN = Pattern.compile(
"Name: (\\S+)\n"+
"Start: (\\d+)\n" +
"End: (\\d+)\n" +
"Length: (\\d+)\n" +
"Score: (\\d+\\.\\d+)\n" +
"Strand: (\\S)\n" +
"Maximum_score_at: (\\d+)\n" +
"Standard_deviations: (\\d+\\.\\d+)\n"
);
private void parseSummary(CharSequence summary) {
Matcher matcher = SUMMARY_PATTERN.matcher(summary);
if (matcher.matches()) {
String name = matcher.group(1);
int start = Integer.parseInt(matcher.group(2));
int end = Integer.parseInt(matcher.group(3));
int length = Integer.parseInt(matcher.group(4));
String score = matcher.group(5);
String strand = matcher.group(6);
int maxScoreAt = Integer.parseInt(matcher.group(7));
String stdDeviations = new Double(matcher.group(8)).toString();
hits.add(new HTHHit(name, start, end, length, score, strand, maxScoreAt, stdDeviations));
}
else {
logger.error("Failed to parse summary:\n" + summary);
}
}
}
/* Each 'hit' corresponds to a paragraph beginning with the word 'Feature' in the .hth file */
class HTHHit {
private String name, strand,score, stdDeviations;
private int start, end, length, maxScoreAt;
public HTHHit(String name, int start, int end, int length, String score, String strand, int maxScoreAt, String stdDeviations) {
this.name = name;
this.start = start;
this.end = end;
this.length = length;
this.score = score;
this.strand = strand;
this.maxScoreAt = maxScoreAt;
this.stdDeviations = stdDeviations;
}
public String getName() {
return name;
}
public String getStrand() {
return strand;
}
public int getStart() {
return start;
}
public int getEnd() {
return end;
}
public int getLength() {
return length;
}
public int getMaxScoreAt() {
return maxScoreAt;
}
public String getStdDeviations() {
return stdDeviations;
}
public String getScore() {
return score;
}
}