package org.genedb.web.utils;
import org.apache.log4j.Logger;
import org.genedb.util.SequenceUtils;
import org.genedb.web.mvc.controller.download.SequenceType;
import org.gmod.schema.feature.AbstractExon;
import org.gmod.schema.feature.AbstractGene;
import org.gmod.schema.feature.Polypeptide;
import org.gmod.schema.feature.ProductiveTranscript;
import org.gmod.schema.feature.Transcript;
import org.gmod.schema.mapped.Feature;
import org.gmod.schema.mapped.FeatureLoc;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
public class DownloadUtils {
private static Logger logger = Logger.getLogger(DownloadUtils.class);
private static int FEATURE_PREFIX_WIDTH = 22;
private static int MAX_FEATURE_WIDTH = 18;
private static final String FEATURE_TABLE_PREFIX = String.format("%-"+FEATURE_PREFIX_WIDTH+"s", "FT");
private static final int FASTA_WIDTH = 60;
private static final int BASES_WIDTH = 10;
public static void writeFasta(PrintWriter out, String header, String sequence) {
out.print("> ");
out.println(header);
int startPos = 0;
int sequenceLen = sequence.length();
while (startPos < sequenceLen) {
int endPos = startPos + FASTA_WIDTH;
if (endPos > sequenceLen) {
endPos = sequenceLen;
}
out.println(sequence.substring(startPos, endPos));
startPos += FASTA_WIDTH;
}
}
public static String writeFasta(String header, String sequence) {
StringBuilder fasta = new StringBuilder();
fasta.append(">");
fasta.append(header);
fasta.append("\n");
int startPos = 0;
int sequenceLen = sequence.length();
while (startPos < sequenceLen) {
int endPos = startPos + BASES_WIDTH;
if (endPos > sequenceLen) {
endPos = sequenceLen;
}
fasta.append(sequence.substring(startPos, endPos));
//fasta.append(" ");
startPos += BASES_WIDTH;
if(startPos % 60 == 0) {
fasta.append("\n");
}
}
return fasta.toString();
}
public static void writeEmblEntry(PrintWriter out, String featureType,
boolean forwardStrand, int min, int max,
Map<String, String> qualifiers) {
if (featureType.length() > MAX_FEATURE_WIDTH) {
featureType = featureType.substring(0, MAX_FEATURE_WIDTH);
}
out.format("FT %-"+(FEATURE_PREFIX_WIDTH-3)+"s", featureType);
if (!forwardStrand) {
out.print("complement(");
}
out.print(min - 1 +".."+max); // Interbase conversion
if (!forwardStrand) {
out.print(")");
}
out.println();
for (Map.Entry<String, String> qualifier: qualifiers.entrySet()) {
out.println(FEATURE_TABLE_PREFIX+"/"+qualifier.getKey()+"=\""+qualifier.getValue()+"\"");
}
}
public static String writeEmblEntry(String featureType,
boolean forwardStrand, int min, int max,
Map<String, String> qualifiers) {
StringBuilder embl = new StringBuilder();
if (featureType.length() > MAX_FEATURE_WIDTH) {
featureType = featureType.substring(0, MAX_FEATURE_WIDTH);
}
embl.append(String.format("FT %-"+(FEATURE_PREFIX_WIDTH-3)+"s", featureType));
if (!forwardStrand) {
embl.append("complement(");
}
embl.append(min - 1 +".."+max); // Interbase conversion
if (!forwardStrand) {
embl.append(")");
}
embl.append("\n");
for (Map.Entry<String, String> qualifier: qualifiers.entrySet()) {
embl.append(FEATURE_TABLE_PREFIX+"/"+qualifier.getKey()+"=\""+qualifier.getValue()+"\"");
embl.append("\n");
}
return embl.toString();
}
public static String getSequence(AbstractGene gene,SequenceType sequenceType, int prime3, int prime5) {
boolean reverseCompliment = false;
if (gene.getRankZeroFeatureLoc().getStrand() < 0) {
reverseCompliment = true;
}
// the following cases do not need transcripts
switch (sequenceType) {
case UNSPLICED_DNA:
String sequence = gene.getResidues();
if (reverseCompliment) {
sequence = SequenceUtils.reverseComplement(sequence);
}
return sequence;
case INTERGENIC_3:
return fetchParentSequence(gene, false, prime3, 0);
case INTERGENIC_5:
return fetchParentSequence(gene, false, 0, prime5);
case INTERGENIC_3and5:
return fetchParentSequence(gene, true, prime3, prime5);
}
StringBuffer sb = new StringBuffer();
for (Transcript transcript : gene.getTranscripts()) {
String seq = getSequence(transcript, sequenceType, prime3, prime5);
sb.append(seq);
}
return sb.toString();
}
public static String getSequence(Transcript transcript, SequenceType sequenceType, int prime3, int prime5) {
boolean reverseCompliment = false;
if (transcript.getRankZeroFeatureLoc().getStrand() < 0) {
reverseCompliment = true;
}
ProductiveTranscript productiveTranscript = null;
if (transcript instanceof ProductiveTranscript) {
productiveTranscript = (ProductiveTranscript) transcript;
}
switch (sequenceType) {
case SPLICED_DNA:
if (transcript.getResidues() != null) {
return new String(transcript.getResidues());
}
case UNSPLICED_DNA:
String sequence = transcript.getGene().getResidues();
if (reverseCompliment) {
sequence = SequenceUtils.reverseComplement(sequence);
}
return sequence;
case PROTEIN:
if (productiveTranscript == null) {
return null;
}
try {
return new String(productiveTranscript.getProtein().getResidues());
} catch (NullPointerException npe) {
logger.error(npe.getStackTrace());
return null;
}
case INTRON_AND_EXON:
if (productiveTranscript == null) {
return null;
}
return getIntronsAndExons(productiveTranscript);
case INTERGENIC_3:
return fetchParentSequence(transcript, false, prime3, 0);
case INTERGENIC_5:
return fetchParentSequence(transcript, false, 0, prime5);
case INTERGENIC_3and5:
return fetchParentSequence(transcript, true, prime3, prime5);
}
return null;
}
public static String getSequence(Polypeptide polypeptide, SequenceType sequenceType, int prime3, int prime5) {
Transcript transcript = polypeptide.getTranscript();
if (transcript != null) {
return getSequence(transcript, sequenceType, prime3, prime5);
}
return null;
}
private static class Position {
public int start;
public int stop;
public boolean upper = false;
}
/**
* Retrieves the entire sequence of the transcript, with the exons capitalized.
* @author gv1
* @param transcript
* @return
*/
private static String getIntronsAndExons(ProductiveTranscript transcript) {
String sequence = new String();
List<Position> positions = new ArrayList<Position>();
int lastPoint = -1;
for (AbstractExon exon : transcript.getExons()) {
int exonStart = exon.getStart();
if (lastPoint == -1) {
lastPoint = exonStart;
}
int intronStart = exonStart - lastPoint;
if (intronStart > 0) {
Position intronPosition = new Position();
intronPosition.start = lastPoint;
intronPosition.stop = exonStart;
positions.add(intronPosition);
//logger.debug(intronPosition.start + " ... " + intronPosition.stop);
}
//logger.debug(exon.getStart() + " <...> " + exon.getStop());
Position exonPosition = new Position();
exonPosition.start = exon.getStart();
exonPosition.stop = exon.getStop();
exonPosition.upper = true;
positions.add(exonPosition);
lastPoint = exon.getStop();
}
for (Position p : positions) {
String str;
if (p.upper) {
str = new String(transcript.getGene().getRankZeroFeatureLoc().getSourceFeature().getResidues(p.start, p.stop).toUpperCase() );
} else {
str = new String(transcript.getGene().getRankZeroFeatureLoc().getSourceFeature().getResidues(p.start, p.stop) );
}
sequence = sequence.concat(str);
}
return sequence;
}
// TODO Check off by one
private static String fetchParentSequence(Feature t, boolean includeTranscript, int prime3, int prime5) {
//logger.info(prime3 + " --- " + prime5);
//System.out.println(String.format("prime3:: %s prime5:: %s", prime3, prime5));
if (prime3>0 && prime5 > 0 && !includeTranscript) {
throw new IllegalArgumentException("Can't fetch sequence from both sides of a transcript but not include the transcript");
}
if (prime3<0 || prime5 < 0) {
throw new IllegalArgumentException("Can't use -ve sequence offsets");
}
FeatureLoc fl = t.getRankZeroFeatureLoc();
//logger.info(fl.getFmin() + " ,,, " + fl.getFmax());
int start;
int end;
if (includeTranscript) {
start = fl.getFmin() - prime3;
end = fl.getFmax() + prime5;
} else {
if (prime3 > 0) {
start = fl.getFmin() - prime3;
end = fl.getFmin();
} else {
// Prime 5 end
start = fl.getFmax();
end = fl.getFmax() + prime5;
}
}
Feature parent = fl.getSourceFeature();
if (start > end) {
int tmp = start;
start = end;
end = tmp;
}
int parentSequenceLength = parent.getSeqLen();
if (end > parentSequenceLength) {
logger.warn(String.format("Correcting end %s to %s (the parent length)", end, parentSequenceLength));
end = parentSequenceLength;
}
//System.out.println(String.format("START:: %s END:: %s", start, end));
return new String(parent.getResidues(start, end, fl.getStrand() == -1));
}
}
//public class FastaUtils {
//
// public static void exportFeatureFasta(Writer w, boolean spaces, Feature feat) throws IOException {
// exportFasta(w, feat.getType().getName()+":"+feat.getUniqueName(), feat.getResidues(), spaces);
// }
//
// public static void exportFastaRegion(Writer w, String header, boolean spaces,
// Feature feat, Strand strand, int min, int max) throws IOException {
// String seq = feat.getResidues(min, max, (strand == Strand.REVERSE));
// exportFasta(w, header, seq, spaces);
// }
//
//
// public static void exportFasta(Writer w, String header, String seq, boolean spaces) throws IOException {
// w.write('>' + header + '\n');
// int count = 0;
// for (char c : seq.toCharArray()) {
// if (count % 60 == 0) {
// w.write('\n');
// } else {
// if (spaces && count % 10 == 0) {
// w.write(' ');
// }
// count++;
// }
// w.write(c);
// count++;
// }
// }
//
//}
//public class EmblUtils {
//
// public static void exportEmbl(Writer w, Feature feat, int min, int max, boolean internal, boolean strict, boolean truncateEnds) throws IOException {
// exportHeader();
// exportTab();
// exportSequence(w, feat, min, max);
// }
//
// private static void exportSequence(Writer w, Feature feat, int min, int max) throws IOException {
// // TODO - ignores strand
// String seq = feat.getResidues(min, max);
// exportSequence(w, seq);
// }
//
// private static void exportSequence(Writer w, String seq) throws IOException {
// // XX
// // SQ Sequence 29663 BP; 9792 A; 5106 C; 5232 G; 9533 T; 0 other;
// // gatcacgtac atcaccttgt aagaatttat ctgcaatagt ccttcggtat tgtacattgt 60
// // ...
// // tggttctgat attgaacaaa tagaactaca aaatatgcct actcctgtga aaaaataatt 29640
// // ttctttatcg ttttcatgat ccc 29663
// // //
// pln(w, "XX");
//
//
// w.write("SQ Sequence ");
// w.write(seq.length());
// w.write(" BP;");
// // TODO stats
// w.write('\n');
//
// for (int i = 0; i < seq.length(); i++) {
// char c = seq.charAt(i);
// if (i % 60 == 0) {
// String count = Integer.toString(i);
// w.write(StringUtils.leftPad(" ", 10, count));
// w.write('\n');
// } else {
// if (i % 10 == 0) {
// w.write(' ');
// }
// }
// w.write(c);
// }
// if (seq.length() % 60 != 0) {
// // TODO cope with remainder on last line
// int used = seq.length() % 60;
// int toPad = 75; // 10 *6 + 1 *5 + 10
// String count = Integer.toString(seq.length());
// w.write(StringUtils.leftPad(" ", toPad, count));
// w.write('\n');
// }
// pln(w, "//");
// }
//
// private static void pln(Writer w, String line) throws IOException {
// w.write(line);
// w.write('\n');
// }
//
// private static void exportTab() {
// // TODO Auto-generated method stub
// }
//
// private static void exportHeader() {
// // TODO Auto-generated method stub
// }
//}