package aliview;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import org.apache.commons.lang.StringUtils;
import utils.nexus.NexusUtilities;
import aliview.alignment.Alignment;
import aliview.sequences.Sequence;
/*
*
* TODO maybe generalize this so it could be in general NexusUtilities
*
*/
public class AliViewExtraNexusUtilities {
private static final String LF = System.getProperty("line.separator");
public static final int DATATYPE_PROTEIN = 1;
public static final int DATATYPE_DNA = 7;
public static final void exportAlignmentAsNexus(Writer out, Alignment alignment, boolean simplified, int datatype) throws IOException{
//String nexus = "";
String dataTypeString = "DNA";
if(datatype == DATATYPE_PROTEIN){
dataTypeString = "PROTEIN";
}
//String sequentialParameter = " SEQUENTIAL=YES";
String sequentialParameter = "";
out.write("#NEXUS" + LF);
out.write(LF);
out.write("BEGIN DATA;" + LF);
out.write("DIMENSIONS NTAX=" + alignment.getSize() + " NCHAR=" + alignment.getMaximumSequenceLength() + ";" + LF);
out.write("FORMAT DATATYPE=" + dataTypeString + sequentialParameter + " GAP=- MISSING=?;" + LF);
out.write("MATRIX" + LF);
out.write(LF);
int longestSequenceNameLen = alignment.getLongestSequenceName();
int BLANK_SPACE_SIZE = 3;
int MAX_LEN = 99;
for(Sequence seq: alignment.getSequences()){
if(simplified){
String name = NexusUtilities.replaceProblematicChars(seq.getName());
int padSize = longestSequenceNameLen + BLANK_SPACE_SIZE;
if(padSize > MAX_LEN){
name = StringUtils.substring(name, 0, MAX_LEN - BLANK_SPACE_SIZE);
padSize = MAX_LEN;
}
out.write("" + StringUtils.rightPad(name,padSize) + "" + seq.getBasesAsString());
out.write(LF);
}else{
out.write("" + StringUtils.rightPad(seq.getName(),longestSequenceNameLen + BLANK_SPACE_SIZE) + "" + seq.getBasesAsString());
out.write(LF);
}
}
out.write(";" + LF);
out.write(LF);
out.write("END;" + LF);
out.write(LF);
if(alignment.getAlignmentMeta().getExcludes() != null){
out.write(NexusUtilities.getExcludesAsNexusBlock(alignment.getAlignmentMeta().getExcludes()));
out.write(LF);
out.write(LF);
}
if(alignment.getAlignmentMeta().getCodonPositions() != null){
out.write(NexusUtilities.getCodonPosAsNexusBlock(alignment.getAlignmentMeta().getCodonPositions(), 0, alignment.getMaximumSequenceLength()));
out.write(LF);
out.write(LF);
}
if(alignment.getAlignmentMeta().getCharsets() != null){
out.write(NexusUtilities.getCharsetsBlockAsNexus(alignment.getAlignmentMeta().getCharsets()));
out.write(LF);
out.write(LF);
}
out.flush();
out.close();
}
public static final void exportAlignmentAsNexusCodonpos(Writer out, Alignment alignment, int datatype) throws IOException{
//String nexus = "";
String dataTypeString = "DNA";
if(datatype == DATATYPE_PROTEIN){
dataTypeString = "PROTEIN";
}
int length = alignment.getMaximumSequenceLength();
// dump pos
boolean removeExcluded = true;
ArrayList<Integer> allPos0 = alignment.getAllCodonPositions(0,removeExcluded, 0, length - 1);
ArrayList<Integer> allPos1 = alignment.getAllCodonPositions(1,removeExcluded, 0, length - 1);
ArrayList<Integer> allPos2 = alignment.getAllCodonPositions(2,removeExcluded, 0, length - 1);
ArrayList<Integer> allPos3 = alignment.getAllCodonPositions(3,removeExcluded, 0, length - 1);
int nChar = allPos0.size() + allPos1.size() + allPos2.size() + allPos3.size();
out.write("#NEXUS" + LF);
out.write(LF);
out.write("BEGIN DATA;" + LF);
out.write("DIMENSIONS NTAX=" + alignment.getSize() + " NCHAR=" + nChar + ";" + LF);
out.write("FORMAT DATATYPE=" + dataTypeString + " INTERLEAVE=YES GAP=- MISSING=?;" + LF);
out.write("MATRIX" + LF);
out.write(LF);
int longestName = alignment.getLongestSequenceName();
for(Sequence seq: alignment.getSequences()){
// also replace some characters not always understood
String seqName = seq.getName();
seqName = seqName.replace(' ','_');
out.write("" + StringUtils.rightPad(seqName,longestName + 3));
out.write(seq.getBasesAtThesePosAsString(allPos0));
out.write(seq.getBasesAtThesePosAsString(allPos1));
out.write(seq.getBasesAtThesePosAsString(allPos2));
out.write(seq.getBasesAtThesePosAsString(allPos3));
out.write(LF);
}
out.write(";" + LF);
out.write("END;" + LF);
out.write(LF);
// TODO this is not safe if one pos should be 0-size
// Write charset (pos 1, 2 & 3)
out.write("BEGIN SETS;" + LF);
int start = 1;
int end = 0;
if(allPos0.size() > 0){
end = start + allPos0.size() - 1;
out.write("charset npos = " + start + "-" + end + ";" + LF);
start = end + 1;
}
if(allPos1.size() > 0){
end = start + allPos1.size() - 1;
out.write("charset 1st = " + start + "-" + end + ";" + LF);
start = end + 1;
}
if(allPos2.size() > 0){
end = start + allPos2.size() - 1;
out.write("charset 2nd = " + start + "-" + end + ";" + LF);
start = end + 1;
}
if(allPos3.size() > 0){
end = start + allPos3.size() - 1;
out.write("charset 3nd = " + start + "-" + end + ";" + LF);
start = end + 1;
}
out.write("END;" + LF);
out.write(LF);
// end write charsets
// out.write(getExcludesAsNexusBlock(alignment.getExcludes()));
// out.write(LF);
//
// out.write(getCodonPosAsNexusBlock(alignment.getCodonPositions()));
// out.write(LF);
out.flush();
out.close();
}
}