/******************************************************************************* * Copyright 2014 Virginia Polytechnic Institute and State University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package edu.vt.vbi.patric.msa; import java.awt.Dimension; import java.awt.Graphics2D; import java.awt.image.BufferedImage; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import javax.imageio.ImageIO; import edu.vt.vbi.ci.util.CommandResults; import edu.vt.vbi.ci.util.ExecUtilities; import edu.vt.vbi.patric.proteinfamily.Newick; import edu.vt.vbi.patric.msa.SequenceData; public class Aligner { private final static String[] gBlocksDrops = { "</head>", "<h2>Gblocks", "<pre>", "<title>", "<body bgcolor", "Processed file:" }; /* * private final static int HEAD_WADE = 0; private final static int PRE_PRE = 1; private final static int IN_PRE = 2; private final static int * POST_PRE = 3; private final static int TAIL_WADE = 4; */ private String prefix; private char getHtml; private SequenceData[] sequences = null; private String groupID = null; private File trimAligned = null; private File rawAligned = null; private String[] treeLines = null; private int[] aaRange = { Integer.MAX_VALUE, 0 }; private int genomeCount = 1; public Aligner(String newickText, String locusNames, String genomeNames) { treeLines = new String[1]; treeLines[0] = newickText; String[] locusList = locusNames.split("\t"); String[] genomeList = genomeNames.split("\t"); int minLength = Math.min(locusList.length, genomeList.length); sequences = new SequenceData[minLength]; for (int i = 0; i < sequences.length; i++) { sequences[i] = new SequenceData(locusList[i], genomeList[i], null); (sequences[i]).fastaOrder = i; } File toDrop = setPrefix(); toDrop.delete(); } public Aligner(String groupId, String locusNames, String genomeNames, String alignedSequences) { this.groupID = groupId; String[] locusList = locusNames.split("\t"); String[] genomeList = genomeNames.split("\t"); String[] seqList = alignedSequences.split("\t"); int minLength = Math.min(locusList.length, genomeList.length); minLength = Math.min(seqList.length, minLength); sequences = new SequenceData[minLength]; for (int i = 0; i < sequences.length; i++) { sequences[i] = new SequenceData(locusList[i], genomeList[i], seqList[i]); (sequences[i]).fastaOrder = i; } File toDrop = setPrefix(); toDrop.delete(); } public Aligner(char needHtml, String groupId, SequenceData[] sequences) { // save flag for Gblocks run getHtml = needHtml; if ((groupId != null) && (groupId.length() == 0)) { groupId = null; } this.groupID = groupId; this.sequences = sequences; if (sequences != null) { try { File tmpFaa = setPrefix(); BufferedWriter faaWrite = new BufferedWriter(new FileWriter(tmpFaa)); String[] genomeNames = new String[sequences.length]; for (int i = 0; i < sequences.length; i++) { (sequences[i]).fastaOrder = i; genomeNames[i] = (sequences[i]).setFasta(50, faaWrite, aaRange); } faaWrite.close(); Arrays.sort(genomeNames); String checkName = genomeNames[0]; for (int i = 1; i < genomeNames.length; i++) { if (!checkName.equals(genomeNames[i])) { ++genomeCount; checkName = genomeNames[i]; } } Arrays.sort(sequences); ExecUtilities.exec("muscle -fasta -stable -in " + tmpFaa.getAbsolutePath() + " -out " + prefix + "aga"); ExecUtilities.exec("Gblocks " + prefix + "aga -b5=h -p=" + getHtml); rawAligned = new File(prefix + "aga"); trimAligned = new File(prefix + "aga-gb"); if (!trimAligned.exists()) { trimAligned = null; } else { BufferedReader checker = new BufferedReader(new FileReader(trimAligned)); boolean empty = true; String line = checker.readLine(); ArrayList<String> locusList = new ArrayList<String>(); while (line != null) { line = line.trim(); if (0 == line.length()) { line = checker.readLine(); } else if (line.startsWith(">")) { locusList.add(line.substring(1)); line = checker.readLine(); } else { empty = false; line = null; } } checker.close(); if (empty) { trimAligned.delete(); checker = new BufferedReader(new FileReader(rawAligned)); BufferedWriter writer = new BufferedWriter(new FileWriter(trimAligned)); line = checker.readLine(); while (line != null) { writer.write(line); writer.newLine(); line = checker.readLine(); } writer.close(); checker.close(); } } } catch (IOException e) { e.printStackTrace(); } } } private File setPrefix() { File tmpFaa = null; try { tmpFaa = File.createTempFile("msa", ".faa"); } catch (IOException e) { e.printStackTrace(); } prefix = tmpFaa.getAbsolutePath(); int pAt = prefix.lastIndexOf('.'); if (0 <= pAt) { prefix = prefix.substring(0, pAt + 1); } return tmpFaa; } public void runFastTree() { if (trimAligned != null) { CommandResults treeHold = ExecUtilities.exec("FastTree_LG -gamma -nosupport " + trimAligned.getAbsolutePath()); if (treeHold != null) { treeLines = treeHold.getStdout(); } System.out.println("runFastTree::" + treeLines.length); } } private SequenceData getDataForLocus(String toCheck) { SequenceData result = null; if (toCheck.startsWith(">")) { result = new SequenceData(toCheck.substring(1)); int at = Arrays.binarySearch(sequences, result); if (0 <= at) { result = sequences[at]; } } return result; } public void setAlignTree(PrintWriter writer) throws IOException { writer.write("" + sequences.length); writer.write("\t" + genomeCount); writer.write("\t" + aaRange[0] + "\t" + aaRange[1] + "\t"); for (int i = 0; i < treeLines.length; i++) { writer.write(treeLines[i]); // System.out.print(treeLines[i]); } BufferedReader msaRead = new BufferedReader(new FileReader(rawAligned)); String msaLine = msaRead.readLine(); while ((msaLine != null) && (!msaLine.startsWith(">"))) { msaLine = msaRead.readLine(); } if (msaLine != null) { SequenceData lastData = getDataForLocus(msaLine); StringBuffer sequence = new StringBuffer(); msaLine = msaRead.readLine(); while (msaLine != null) { SequenceData nextData = getDataForLocus(msaLine); if (nextData != null) { if (0 < sequence.length()) { writer.write("\t" + lastData.locus + "\t" + lastData.taxonName + "\t" + sequence.toString()); sequence = new StringBuffer(); lastData = nextData; } } else { String[] parts = msaLine.split("\\s"); for (int i = 0; i < parts.length; i++) { sequence.append(parts[i]); } } msaLine = msaRead.readLine(); } if (0 < sequence.length()) { writer.write("\t" + lastData.locus + "\t" + lastData.taxonName + "\t" + sequence.toString()); writer.write("\f" + rawAligned.getAbsolutePath()); } } msaRead.close(); // trimAligned.delete(); trimAligned = null; // rawAligned.delete(); rawAligned = null; } public void setTreePng(boolean genomeTips, boolean flushTips, PrintWriter writer) throws IOException { if (rawAligned != null) { rawAligned.delete(); rawAligned = null; } StringBuffer sb = new StringBuffer(); for (int i = 0; i < treeLines.length; i++) { sb.append(treeLines[i]); } Newick treeForm = new Newick(sb.toString()); treeForm.setGenomeNames(sequences); treeForm.setTreeType(genomeTips, flushTips); BufferedImage gGetter = new BufferedImage(100, 100, BufferedImage.TYPE_INT_RGB); // Create a graphics contents on the buffered image Graphics2D g2d = gGetter.createGraphics(); Dimension preferred = treeForm.getPreferredSize(1, g2d); preferred.width = 666; // Graphics context no longer needed so dispose it // g2d.dispose(); gGetter = new BufferedImage(preferred.width, preferred.height, BufferedImage.TYPE_INT_RGB); treeForm.paint(gGetter); File pngFile = new File(prefix + "png"); try { ImageIO.write(gGetter, "PNG", pngFile); } catch (IOException err) { err.printStackTrace(); } g2d.dispose(); writer.write(prefix + "png"); String[] nameOrder = new String[sequences.length]; for (int i = 0; i < sequences.length; i++) { int at = treeForm.getTipIndex((sequences[i]).locus); nameOrder[at] = (sequences[i]).getLongName(); } for (int i = 0; i < nameOrder.length; i++) { writer.write("\t" + nameOrder[i]); } if (trimAligned != null) { trimAligned.delete(); trimAligned = null; } } public void getGblocksPrintable(boolean genomeTags, String conserveChop, String description, PrintWriter writer) throws IOException { if (trimAligned != null) { if (rawAligned != null) { rawAligned.delete(); rawAligned = null; } } else { rawAligned = new File(prefix + "aga"); if (!rawAligned.exists()) { BufferedWriter aWrite = new BufferedWriter(new FileWriter(rawAligned)); for (int i = 0; i < sequences.length; i++) { (sequences[i]).writeToFasta(aWrite); } aWrite.close(); } int chop = Integer.parseInt(conserveChop); int minCheck = sequences.length; minCheck >>= 1; ++minCheck; if (minCheck == chop) { ExecUtilities.exec("Gblocks " + prefix + "aga -b5=h -p=y"); } else { if (chop < sequences.length) { ++chop; } ExecUtilities.exec("Gblocks " + prefix + "aga -p=y -b5=h -b2=" + chop + " -b1=" + conserveChop); } rawAligned.delete(); rawAligned = null; trimAligned = new File(prefix + "aga-gb"); } File alignmentFile = new File(prefix + "aga-gb.htm"); adjustAlignHtml(genomeTags, description, alignmentFile, writer); // trimAligned.delete(); trimAligned = null; alignmentFile.delete(); } int checkForSpecial(String line) { int result = -1; for (int i = 0; i < gBlocksDrops.length; i++) { if (line.startsWith(gBlocksDrops[i])) { result = i; i = gBlocksDrops.length; } } return result; } private void adjustAlignHtml(boolean genomeTags, String description, File alignmentFile, PrintWriter writer) throws IOException { SequenceData[] sortSave = sequences; sequences = new SequenceData[sequences.length]; for (int i = 0; i < sortSave.length; i++) { sequences[(sortSave[i]).fastaOrder] = sortSave[i]; } String[] expander = new String[sequences.length]; int maxLeft = 0; String line = null; for (int i = 0; i < sequences.length; i++) { if (genomeTags) { line = (sequences[i]).taxonName; } else { line = (sequences[i]).locus; } int nextLength = line.length(); if (22 < nextLength) { line = line.substring(0, 22); maxLeft = 22; } else { maxLeft = Math.max(maxLeft, nextLength); } expander[i] = line; } char[] emptyForm = new char[maxLeft]; Arrays.fill(emptyForm, ' '); String emptyLeft = new String(emptyForm); BufferedReader reader = new BufferedReader(new FileReader(alignmentFile)); line = reader.readLine(); boolean prePre = true; boolean postPre = false; ArrayList<String> leftSide = new ArrayList<String>(); String equalSkip = null; int skipCount = 0; ArrayList<String> rightSide = new ArrayList<String>(); int expCheck = 0; while (line != null) { if (prePre) { int preAt = checkForSpecial(line); if (preAt < 0) { writer.write(line + "\n"); } else if (preAt == 0) { writer.write("</head>\n"); writer.write("<body id='popup'>\n"); writer.write("<div id='page-wrapper-content-bg'>\n"); writer.write("<div id='page-wrapper'>\n"); writer.write("<div id='page-area>\n"); /* * writer.write("<div id='header'>" + "<img src=\"/patric/images/logo_popup.gif\" " + "width='219' height='84' " + * "alt='Patric - PathoSystems Resource Integration Center' />" + "</div>\n"); */ writer.write("<div id='content-area'>\n"); } else if (preAt == 1) { preAt = line.indexOf("Results"); line = line.substring(0, preAt); if ((groupID != null) && (0 < groupID.length())) { line += "for " + groupID + ":"; if (description != null) { line += description; } } writer.write(line + "</h2>\n"); } else if (preAt == 2) { prePre = false; } } else if (postPre) { for (int i = 0; i < sequences.length; i++) if (line.startsWith("New number of positions")) { int keepAt = line.indexOf("<b>"); line = line.substring(keepAt); line = "New number of positions " + line; } else if (line.startsWith("</body>")) { line = "</div></div></div></div>" + line; } writer.write(line + "\n"); } else { if (line.startsWith("</pre>")) { writer.write("<pre>"); Iterator<String> itLeft = leftSide.iterator(); Iterator<String> itRight = rightSide.iterator(); while (itLeft.hasNext()) { String next = itLeft.next(); int indent = maxLeft - next.length(); if (0 < indent) { next += emptyLeft.substring(0, indent); } writer.write(next + " "); next = itRight.next(); if (next.startsWith(equalSkip)) { next = next.substring(skipCount); } writer.write(next + "\n"); } postPre = true; for (int i = 0; i < sequences.length; i++) { (sequences[i]).writeLongName(writer); } } else { String eCheck = line.trim(); if (eCheck.length() == 0) { leftSide.add(emptyLeft); rightSide.add(""); } else if (line.charAt(0) != ' ') { leftSide.add(expander[expCheck]); ++expCheck; if (expander.length <= expCheck) { expCheck = 0; } rightSide.add(line.substring(skipCount)); } else { leftSide.add(emptyLeft); int equalAt = line.indexOf("==="); if (equalAt < 0) { rightSide.add(line); } else { if (equalSkip == null) { equalSkip = line.substring(0, equalAt); skipCount = equalAt; } rightSide.add(line.substring(equalAt)); } } } } line = reader.readLine(); } reader.close(); sequences = sortSave; } }