package aliview.importer;
import java.io.File;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.bitbucket.kienerj.io.OptimizedRandomAccessFile;
import utils.OSNativeUtils;
public class FileFormat {
private static final Logger logger = Logger.getLogger(FileFormat.class);
private String name;
private String suffix;
private String suffixWin;
public static final FileFormat UNKNOWN = new FileFormat("Unknown", "", "");
public static final FileFormat FILE_FASTA = new FileFormat("Fasta", "fasta", "fas");
public static final FileFormat FASTA = new FileFormat("Fasta", "fasta", "fas");
public static final FileFormat NEXUS = new FileFormat("Nexus", "nexus", "nex");
public static final FileFormat NEXUS_CODONPOS_CHARSET = new FileFormat("NexusCodonposCharset", "codonpos.nexus", "codonpos.nex");
public static final FileFormat NEXUS_SIMPLE = new FileFormat("NexusSimple", "nexus", "nex");
public static final FileFormat PHYLIP = new FileFormat("Phylip", "phy", "phy");
public static final FileFormat PHYLIP_RELAXED = new FileFormat("Phylip", "phy", "phy");
public static final FileFormat PHYLIP_RELAXED_PADDED_AKA_LONG_NAME_SEQUENTIAL = new FileFormat("Phylip", "phy", "phy");
public static final FileFormat PHYLIP_RELAXED_PADDED_INTERLEAVED_AKA_LONG_NAME_INTERLEAVED = new FileFormat("Phylip", "phy", "phy");
public static final FileFormat PHYLIP_STRICT_SEQUENTIAL_AKA_SHORT_NAME_SEQUENTIAL = new FileFormat("Phylip", "phy", "phy");
public static final FileFormat PHYLIP_SHORT_NAME_INTERLEAVED = new FileFormat("Phylip", "phy", "phy");
public static final FileFormat MSF = new FileFormat("MSF", "msf", "msf");
public static final FileFormat CLUSTAL = new FileFormat("Clustal", "aln", "aln");
public static final FileFormat IMAGE_PNG = new FileFormat("png-image", "png", "png");
// TODO should be different when not translated AminoAcid
public static final FileFormat PHYLIP_TRANSLATED_AMINO_ACID = new FileFormat("PhylipAminoAcid", "translated.phy", "translated.phy");
public static final FileFormat NEXUS_TRANSLATED_AMINO_ACID = new FileFormat("NexusTranslated", "translated.nexus", "translated.nex");
public static final FileFormat FASTA_TRANSLATED_AMINO_ACID = new FileFormat("FastaTranslated", "translated.fasta", "translated.fas");
public static void main(String[] args) {
//FileFormat ffFileTest = new FileFormat();
//ffFileTest.isFileOfAlignmentFormat(new File("/vol2/big_data/SSURef_108_filtered_bacteria_pos_5389-24317.fasta"));
}
public FileFormat(String name, String suffix, String suffixWin) {
this.name = name;
this.suffix = suffix;
this.suffixWin = suffixWin;
}
public static final String stripFileSuffixFromName(String name){
String strippedName = StringUtils.substringBeforeLast(name, ".");
return strippedName;
}
public String getSuffix(){
if(OSNativeUtils.isWindows()){
return suffixWin;
}else{
return suffix;
}
}
@Override
public String toString() {
return this.name;
}
public static boolean isThisFasta(String seq){
boolean isFasta = false;
if(seq != null && seq.startsWith(">")){
isFasta = true;
}
return isFasta;
}
public static FileFormat isFileOfAlignmentFormat(File seqFile){
if(seqFile == null || !seqFile.exists()){
return null;
}
long startTime = System.currentTimeMillis();
FileFormat foundFormat = null;
try {
StringBuilder sequence = new StringBuilder();
//File seqFile = new File("/home/anders/projekt/ormbunkar/analys/karin_alignment/ssu_pr2-99.fasta.diffenc2");
//RandomAccessFile raf = new RandomAccessFile(seqFile, "r");
OptimizedRandomAccessFile raf = new OptimizedRandomAccessFile(seqFile, "r");
//BufferedReader r = new BufferedReader(this.reader);
String line = "";
String name = null;
int nLine = 0;
long nSeqCount = 0;
byte[] buffer = new byte[200];
if ((raf.read(buffer)) > 0) {
String filestart = new String(buffer);
// remove controlchar
filestart = StringUtils.trim(filestart);
// only first char
String[] splitted = filestart.split("\n");
String firstLine = splitted[0];
logger.info("firstLine" + firstLine);
if(firstLine.startsWith(">")){
foundFormat = FileFormat.FASTA;
}else if(StringUtils.containsIgnoreCase(firstLine, "NEXUS")){
foundFormat = FileFormat.NEXUS;
}else if(ClustalImporter.isStringValidFirstLine(firstLine)){
foundFormat = FileFormat.CLUSTAL;
}else if(MSFImporter.isStringValidFirstLine(firstLine)){
foundFormat = FileFormat.MSF;
}else if(PhylipImporter.isStringValidFirstLine(firstLine)){
foundFormat = FileFormat.PHYLIP;
}
}
long endTime = System.currentTimeMillis();
logger.info("check fileformat took " + (endTime - startTime) + " milliseconds, found:" + foundFormat);
}catch(Exception exc){
exc.printStackTrace();
// not file format skip
}
return foundFormat;
}
public static boolean isThisSequenceFile(String fileName) {
boolean isSequenceFile = false;
if(fileName != null){
File testFile = new File(fileName);
FileFormat format = isFileOfAlignmentFormat(testFile);
if(format != null){
isSequenceFile = true;
}
}
return isSequenceFile;
}
}