package utils.nexus; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Scanner; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.math.IntRange; import org.apache.log4j.Logger; import utils.RangeUtils; public class NexusUtilities { private static final Logger logger = Logger.getLogger(NexusUtilities.class); private static final String LF = System.getProperty("line.separator"); public static void main(String[] args) { try { new NexusUtilities().createCharsetsFromNexusFile(new File("/home/anders/projekt/ormbunkar/analys/seqconcat_test/seqconcat_test.nexus"), 10000); } catch (NexusAlignmentImportException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static final boolean updateExcludesFromFile(File alignmentFile, Excludes excludes) throws NexusAlignmentImportException { logger.info("look for nexus EXSET block in file " + alignmentFile.toString()); try { String assumptionsBlock = extractBlockFromFile(alignmentFile, "BEGIN ASSUMPTIONS;","END"); if(assumptionsBlock == null || assumptionsBlock.length() == 0){ return false; } logger.info("exsetBlock" + assumptionsBlock); String exsetBlock = StringUtils.substringBetween(assumptionsBlock, "EXSET", ";"); if(exsetBlock == null){ return false; } logger.info("Found block"); String excludeString = StringUtils.substringAfter(exsetBlock, "="); logger.info("ecludestring" + excludeString); if(excludeString == null || excludeString.length() == 0){ return true; } ArrayList<NexusRange> allRanges = parseNexusRanges(excludeString, 0); for(NexusRange range: allRanges){ excludes.addNexusRange(range); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); throw new NexusAlignmentImportException("Could not parse NEXUS EXSET Block"); } return true; } public static final boolean updateCodonPositionsFromNexusFile(File alignmentFile, CodonPositions codonPositions) throws NexusAlignmentImportException { logger.info("look for nexus BEGIN CODONS; block in file " + alignmentFile.toString()); try { String codonsBlock = extractBlockFromFile(alignmentFile, "BEGIN CODONS;", "END;"); if(codonsBlock == null || codonsBlock.length() == 0){ return false; } // TODO there could be different codonposset - I just use first one String codonPositionsBlock = StringUtils.substringBetween(codonsBlock, "CODONPOSSET", ";"); if(codonPositionsBlock == null || codonPositionsBlock.length() == 0){ return false; } // add a "," to the end makes it easier to parse (then codon pos can be unordered codonPositionsBlock += ","; logger.info("Found block"); Scanner lineTokenizer = new Scanner(codonPositionsBlock).useDelimiter(","); logger.info("search" + lineTokenizer.findWithinHorizon("\\?:", codonPositionsBlock.length())); // while(lineTokenizer.hasNext()){ // logger.info(lineTokenizer.next()); // } // NexusRangesTranslator nexusRangesTranslator = new NexusRangesTranslator(); // Always set position n - 1 because program internally is working with first pos in alignment as 0 (and in codonpos block from 1) // TODO with n range does not ends with \3 String nPos = StringUtils.substringBetween(codonPositionsBlock, "N:",","); if(nPos != null){ ArrayList<NexusRange> allRanges = parseNexusRanges(nPos, 0); nexusRangesTranslator.addNexusRanges(allRanges); } // TODO questionmarkpos is treated as n // TODO with n range does not ends with \3 String questionmarkPos = StringUtils.substringBetween(codonPositionsBlock, "?:",","); if(questionmarkPos != null){ ArrayList<NexusRange> allRanges = parseNexusRanges(questionmarkPos, 0); nexusRangesTranslator.addNexusRanges(allRanges); } // TODO check that range ends with \3 String pos1 = StringUtils.substringBetween(codonPositionsBlock, "1:",","); if(pos1 != null){ ArrayList<NexusRange> allRanges = parseNexusRanges(pos1, 1); nexusRangesTranslator.addNexusRanges(allRanges); } String pos2 = StringUtils.substringBetween(codonPositionsBlock, "2:",","); if(pos2 != null){ ArrayList<NexusRange> allRanges = parseNexusRanges(pos2, 2); nexusRangesTranslator.addNexusRanges(allRanges); } String pos3 = StringUtils.substringBetween(codonPositionsBlock, "3:",","); if(pos3 != null){ ArrayList<NexusRange> allRanges = parseNexusRanges(pos3, 3); nexusRangesTranslator.addNexusRanges(allRanges); } Ranges allRanges = nexusRangesTranslator.convertToCodonRanges(); codonPositions.addRanges(allRanges); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); throw new NexusAlignmentImportException("Could not parse NEXUS CODONS Block"); } // logger.info(codonPositions.debug()); return true; } private static String getNexusRangesAsBlock(List <NexusRange> ranges){ String rangeBlock = ""; for(NexusRange range: ranges){ if(range.getMinimumInt() == range.getMaximumInt()){ rangeBlock += " " + range.getMinimumInt(); } else{ rangeBlock += " " + range.getMinimumInt() + "-" + range.getMaximumInt(); if(range.getSteps() != 1){ rangeBlock += "\\" + range.getSteps(); } } } return rangeBlock; } public static String getCharsetsBlockWithoutNexus(CharSets charsets) { if(charsets == null){ return ""; } StringBuffer charsetBlock = new StringBuffer(); for(CharSet aSet: charsets){ charsetBlock.append("charset " + aSet.getName() + "=" + getNexusRangesAsBlock(aSet.getAsContinousNexusRanges()) + ";" + LF); } return charsetBlock.toString(); } public static String getCharsetsBlockAsNexus(CharSets charsets) { if(charsets == null){ return ""; } StringBuffer charsetBlock = new StringBuffer("BEGIN SETS;" + LF); charsetBlock.append(getCharsetsBlockWithoutNexus(charsets)); // end line with a semicolon charsetBlock.append("END;"); return charsetBlock.toString(); } public static final CharSets createCharsetsFromNexusFile(File alignmentFile, int alignmentWidth) throws NexusAlignmentImportException { logger.info("look for nexus BEGIN SETS; block in file " + alignmentFile.toString()); CharSets allSets = new CharSets(); try { String setsBlock = extractBlockFromFile(alignmentFile, "BEGIN SETS;", "END;"); if(setsBlock != null && setsBlock.length() > 0){ allSets = createCharsetsFromNexusCharsetBlock(setsBlock); }else{ // No block } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); throw new NexusAlignmentImportException("Could not parse NEXUS CHARSET Block"); } return allSets; } public static CharSets createCharsetsFromNexusCharsetBlock(String setsBlock) throws Exception{ CharSets allSets = new CharSets(); setsBlock = setsBlock.toUpperCase(); String [] tokens = setsBlock.split(";"); for(String token: tokens){ try{ token = token.trim(); if(token.length() > 0){ String[] parts = token.split("="); String name = parts[0].replace("CHARSET", ""); // /i (insensitive name = name.trim(); String ranges = parts[1].trim(); logger.info("ranges" + ranges); CharSet charSet = new CharSet(name); ArrayList<NexusRange> allRanges; try { allRanges = parseNexusRanges(ranges, 0); } catch (Exception e) { throw new Exception(e.getMessage() + LF + "in line:" + LF + token); } boolean areAllContinous = true; for(NexusRange range: allRanges){ if(range.steps != 1){ areAllContinous = false; } } charSet.addNexusRanges(allRanges); allSets.add(charSet); } } catch (Exception e) { throw new Exception(token); } } return allSets; } public static ArrayList<NexusRange> parseNexusRanges(String input, int rangePositionVal) throws Exception{ ArrayList<NexusRange> allRanges = new ArrayList<NexusRange>(); // pad '-' with space to ' - ' to make parsing simpler input = input.replaceAll("-", " - "); NexusParser parser = new NexusParser(input); parser.split(" ", true); logger.info("input:" + input); logger.info(parser.countTokens()); parser.debug(); while(parser.hasMoreTokens()){ if(parser.isNextTokensIntRange()){ logger.info("TOKENiSiNTRANGE"); allRanges.add(parser.getNexusRange(rangePositionVal)); } else if(parser.isNextTokenNumeric()){ logger.info("tokenIsNumeric"); parser.getIntegerAsRange(); } else{ parser.next(); throw new Exception("Could not parse text: " + parser.getToken()); } } for(NexusRange range: allRanges){ // logger.info("" + range.toString()); } return allRanges; } // BEGIN ASSUMPTIONS; // OPTIONS DEFTYPE=unord PolyTcount=MINSTEPS ; // EXSET * UNTITLED = 1-613 701-833 946-1045 1124-1256 1313-1628 1651-1654 1756-1880 1952-2038 2133-2358 2498-2750 2913-2921 2940-2944 3007-3123 3164-3169 3239-3321 3343-3348 3396-3503 3556-3561 3580-3659 3735-3848 3894-3897 3928-3933 3951-4037 4108-4555; // END; // // // BEGIN CODONS; // CODONPOSSET * CodonPositions = // N: 1-76 717-720 946-1045 1124-1256 1313-1628 1651-1654 1756-1880 1952-2038 2133-2358 2498-2506 2659-2740 2940-2944 3007-3123 3239-3321 3396-3503 3580-3659 3735-3848 3894-3897 3951-4037 4108-4112 4152-4555, // 1: 77-716\3 721-943\3 1046-1121\3 1257-1311\3 1630-1648\3 1655-1754\3 1882-1951\3 2041-2131\3 2360-2495\3 2507-2657\3 2742-2937\3 2945-3005\3 3125-3236\3 3322-3394\3 3505-3577\3 3660-3732\3 3849-3891\3 3898-3949\3 4039-4105\3 4113-4149\3, // 2: 78-714\3 722-944\3 1047-1122\3 1258-1312\3 1631-1649\3 1656-1755\3 1883-1949\3 2039-2132\3 2361-2496\3 2508-2658\3 2743-2938\3 2946-3006\3 3126-3237\3 3323-3395\3 3506-3578\3 3661-3733\3 3850-3892\3 3899-3950\3 4040-4106\3 4114-4150\3, // 3: 79-715\3 723-945\3 1048-1123\3 1259-1310\3 1629-1650\3 1657-1753\3 1881-1950\3 2040-2130\3 2359-2497\3 2509-2656\3 2741-2939\3 2947-3004\3 3124-3238\3 3324-3393\3 3504-3579\3 3662-3734\3 3851-3893\3 3900-3948\3 4038-4107\3 4115-4151\3; // CODESET * UNTITLED = Universal: all ; // END; public static final String getExcludesAsNexusBlock(Excludes excludes) { ArrayList<NexusRange> allRanges = excludes.getAsContinousNexusRanges(); String exsetBlock = "BEGIN ASSUMPTIONS;" + LF; exsetBlock += "EXSET * UNTITLED = "; exsetBlock += getNexusRangesAsBlock(allRanges); // end line with a semicolon exsetBlock += ";"; exsetBlock += LF; exsetBlock += "END;"; return exsetBlock; } public static final String getPartialCodonPosAsNexusBlock(CodonPositions codonPositions, int startPos, int endPos) { String nexusBlock = "BEGIN CODONS;" + LF; nexusBlock += "CODONPOSSET * CodonPositions =" + LF; String posN = " N:"; // should not really be necessary to include this String pos1 = " 1:"; String pos2 = " 2:"; String pos3 = " 3:"; // add one to all since program internally is working with first pos in alignment as 0 ArrayList<IntRange> allPos = codonPositions.getAllNonCodingPositionsAsRanges(0, startPos, endPos); RangeUtils.sortIntRangeList(allPos); for(IntRange range: allPos){ posN += " " + (range.getMinimumInteger() + 1) + "-" + (range.getMaximumInteger() + 1) + ""; } allPos = codonPositions.getAllCodingPositionsAsIntRanges(1, startPos, endPos); RangeUtils.sortIntRangeList(allPos); for(IntRange range: allPos){ pos1 += " " + (range.getMinimumInteger() + 1) + "-" + (range.getMaximumInteger() + 1) + "\\3"; } allPos = codonPositions.getAllCodingPositionsAsIntRanges(2, startPos, endPos); RangeUtils.sortIntRangeList(allPos); for(IntRange range: allPos){ pos2 += " " + (range.getMinimumInteger() + 1) + "-" + (range.getMaximumInteger() + 1) + "\\3"; } allPos = codonPositions.getAllCodingPositionsAsIntRanges(3, startPos, endPos); RangeUtils.sortIntRangeList(allPos); for(IntRange range: allPos){ pos3 += " " + (range.getMinimumInteger() + 1) + "-" + (range.getMaximumInteger() + 1) + "\\3"; } // nexusBlock += posN + "," + LF; nexusBlock += posN + "," + LF; nexusBlock += pos1 + "," + LF; nexusBlock += pos2 + "," + LF; nexusBlock += pos3 + ";" + LF; nexusBlock += "CODESET * UNTITLED = Universal: all ;" + LF; nexusBlock += "END;"; return nexusBlock; } /* * * I.e., like this: 4218C_r1_N= 11-118 175-281 402-510; 4218C_r1_1= 2-8\3 119-173\3 283-400\3 512-548\3; 4218C_r1_2= 3-9\3 120-174\3 284-401\3 513-549\3; 4218C_r1_3= 1-10\3 121-172\3 282-399\3 511-550\3; 4218C_r1a_N= 561-862; 4218C_r1a_1= 552-558\3 863-881\3; * * * * */ public static final String getPartialCodonPosAsCharsetNexusBlock(String charsetPrefix, CodonPositions codonPositions, int startPos, int endPos) { String nexusBlock = ""; String posN = "charset " + charsetPrefix + "_N= "; String pos1 = "charset " + charsetPrefix + "_1= "; String pos2 = "charset " + charsetPrefix + "_2= "; String pos3 = "charset " + charsetPrefix + "_3= "; // add one to all since program internally is working with first pos in alignment as 0 ArrayList<IntRange> allPos = codonPositions.getAllNonCodingPositionsAsRanges(0, startPos, endPos); RangeUtils.sortIntRangeList(allPos); for(IntRange range: allPos){ posN += " " + (range.getMinimumInteger() + 1) + "-" + (range.getMaximumInteger() + 1) + ""; } allPos = codonPositions.getAllCodingPositionsAsIntRanges(1, startPos, endPos); RangeUtils.sortIntRangeList(allPos); for(IntRange range: allPos){ pos1 += " " + (range.getMinimumInteger() + 1) + "-" + (range.getMaximumInteger() + 1) + "\\3"; } allPos = codonPositions.getAllCodingPositionsAsIntRanges(2, startPos, endPos); RangeUtils.sortIntRangeList(allPos); for(IntRange range: allPos){ pos2 += " " + (range.getMinimumInteger() + 1) + "-" + (range.getMaximumInteger() + 1) + "\\3"; } allPos = codonPositions.getAllCodingPositionsAsIntRanges(3, startPos, endPos); RangeUtils.sortIntRangeList(allPos); for(IntRange range: allPos){ pos3 += " " + (range.getMinimumInteger() + 1) + "-" + (range.getMaximumInteger() + 1) + "\\3"; } // nexusBlock += posN + "," + LF; nexusBlock += posN + "," + LF; nexusBlock += pos1 + "," + LF; nexusBlock += pos2 + "," + LF; nexusBlock += pos3 + ";" + LF; return nexusBlock; } public static final String getCodonPosAsNexusBlock(CodonPositions codonPositions, int startPos, int endPosInclusive) { return getPartialCodonPosAsNexusBlock(codonPositions, startPos, endPosInclusive); } public static String replaceProblematicChars(String text){ text = text.replaceAll("[^A-Za-z0-9]", "_"); // text = text.replace(' ', '_'); // text = text.replace('-', '_'); // text = text.replace('\'', '_'); // text = text.replace( '?', '_'); // text = text.replace( '.', '_'); // text = text.replace( '/', '_'); // text = text.replace( '|', '_'); // text = text.replace( '\"', '_'); // text = text.replace( ',', '_'); // text = text.replace( '&', '_'); // text = text.replace( '\\', '_'); // text = text.replace( '(', '_'); // text = text.replace( ')', '_'); // text = text.replace( ':', '_'); // text = text.replace( ';', '_'); // text = text.replace( '>', '_'); return text; } private static String getPersonalNexusBlock(int seqLen){ StringBuilder block = new StringBuilder(); block.append("BEGIN MRBAYES;" + LF); block.append("charset aligned-WoodsiapgiC-mafft.fasta.nexus = 1-" + seqLen + ";" + LF); block.append("Partition ALLDNA = 1:aligned-WoodsiapgiC-mafft.fasta.nexus;" + LF); block.append("Set partition = ALLDNA;" + LF); block.append("[GTRG]" + LF); block.append("["+ LF); block.append("Lset applyto=(1) nst=6 rates=gamma;"+ LF); block.append("Prset applyto=(1) revmatpr=Dirichlet(1.0,1.0,1.0,1.0,1.0,1.0) statefreqpr=Dirichlet(1.0,1.0,1.0,1.0) shapepr=Uniform(0.1,50.0);"+ LF); block.append("]"+ LF); block.append("[GTRIG]Lset applyto=(1) nst=6 rates=invgamma;"+ LF); block.append("Prset applyto=(1) revmatpr=Dirichlet(1.0,1.0,1.0,1.0,1.0,1.0) statefreqpr=Dirichlet(1.0,1.0,1.0,1.0) shapepr=Uniform(0.1,50.0) pinvarpr=Uniform(0.0,1.0);"+ LF); block.append("[SYMG]"+ LF); block.append("["+ LF); block.append("Lset applyto=(1) nst=6 rates=gamma;"+ LF); block.append("Prset applyto=(1) revmatpr=Dirichlet(1.0,1.0,1.0,1.0,1.0,1.0) statefreqpr=Fixed(Equal);"+ LF); block.append("]"+ LF); block.append("mcmcp nruns=1 ngen=1000000 printfreq=1000 samplefreq=1000 nchains=1 diagnfreq=10000 burninfrac=0.25 stoprule=no stopval=0.002 temp=0.2 checkpoint=yes checkfreq=500000;"+ LF); block.append("mcmc;"+ LF); block.append("sumt burnin=0.7 nruns=1;"+ LF); block.append("END;"+ LF); return block.toString(); } public static boolean isNexusFile(File alignmentFile) { boolean isNexusFile = false; try { if(alignmentFile != null && alignmentFile.exists()){ BufferedReader r = new BufferedReader(new FileReader(alignmentFile)); String firstLine = r.readLine(); if(firstLine != null & firstLine.length() > 0 & firstLine.length() < 10){ if(firstLine.toLowerCase().indexOf("nexus") > 0){ isNexusFile = true; } } } } catch (Exception e) { e.printStackTrace(); } return isNexusFile; } public static String extractBlockFromFile(File alignmentFile, String start, String end) { StringBuilder block = new StringBuilder(); try { BufferedReader r = new BufferedReader(new FileReader(alignmentFile)); String line; String name = null; int nLine = 0; boolean startFound = false; boolean endFound = false; while ((line = r.readLine()) != null) { if(StringUtils.containsIgnoreCase(line, start)){ startFound = true; int startPos = line.toUpperCase().indexOf(start.toUpperCase()); line = line.substring(startPos + start.length()); } if(startFound && StringUtils.containsIgnoreCase(line, end)){ endFound = true; int startPos = line.toUpperCase().indexOf(end.toUpperCase()); line = line.substring(0,startPos); } if(startFound){ block.append(line); block.append(LF); } if(endFound){ break; } } } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return block.toString(); } }