/* * Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br) * Copyright (C) 2008,2009,2010,2011,2012 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com) * * For further information check the LICENSE file. */ package bio.pih.genoogle.alignment; /** * Class which divided the query and target sequences and create {@link GenoogleSmithWaterman} * instances and delegate to them the alignment execution. This class is useful to save memory for * alignment of the very long sequences. */ public class DividedStringGenoogleSmithWaterman { private final int match; private final int replace; private final int insert; private final int delete; private final int gapExtend; private final int lengthThreshould; StringBuilder queryAlignedBuilder = new StringBuilder(); StringBuilder targetAlignedBuilder = new StringBuilder(); StringBuilder pathAlignedBuilder = new StringBuilder(); String queryAligned = null; String targetAligned = null; String pathAligned = null; int score = 0; private int queryStart; private int queryEnd; private int targetStart; private int targetEnd; private int identitySize; /** * Constructor which inform the scores for the alignment. * * @param match * score when two symbols are the same. * @param replace * score when two symbols are different. * @param insert * score when a gap is put because a insertion. * @param delete * score when a gap is put because a deletion. * @param gapExtend * score to extend a gap. * @param lengthThreshould * minimum length of each sub-query or sub-target sequences. */ public DividedStringGenoogleSmithWaterman(int match, int replace, int insert, int delete, int gapExtend, int lengthThreshould) { this.match = match; this.replace = replace; this.insert = insert; this.delete = delete; this.gapExtend = gapExtend; this.lengthThreshould = lengthThreshould; } /** * Do the alignment of the two given sequences and return the score. Others alignment * informations are stored at the class instance. * * @param query * @param target * @return score of the alignment. */ public int pairwiseAlignment(String query, String target) { if (query.length() <= lengthThreshould || target.length() <= lengthThreshould) { StringGenoogleSmithWaterman aligner = new StringGenoogleSmithWaterman(match, replace, insert, delete, gapExtend); aligner.pairwiseAlignment(query, target); this.queryAligned = aligner.getQueryAligned(); this.targetAligned = aligner.getTargetAligned(); this.pathAligned = aligner.getPath(); this.score = aligner.getScore(); this.identitySize = aligner.getIdentitySize(); this.queryStart = aligner.getQueryStart(); this.queryEnd = aligner.getQueryEnd(); this.targetStart = aligner.getTargetStart(); this.targetEnd = aligner.getTargetEnd(); return this.score; } int m = query.length() / lengthThreshould; int n = target.length() / lengthThreshould; int mRest = query.length() % lengthThreshould; if (mRest != 0) { m++; } int nRest = query.length() % lengthThreshould; if (nRest != 0) { n++; } int c = Math.min(m, n); int queryLength = query.length() / c; int targetLength = target.length() / c; int queryPos = 0; int targetPos = 0; int queryDiff = 0; int targetDiff = 0; queryAlignedBuilder = new StringBuilder(); targetAlignedBuilder = new StringBuilder(); pathAlignedBuilder = new StringBuilder(); score = 0; for (int s = 0; s < c; s++) { int endQueryPiece; int endTargetPiece; if (s == c - 1) { endQueryPiece = query.length(); endTargetPiece = target.length(); } else { endQueryPiece = queryPos + queryLength + queryDiff; endTargetPiece = targetPos + targetLength + targetDiff; } String queryPiece = query.substring(queryPos, endQueryPiece); String targetPiece = target.substring(targetPos, endTargetPiece); StringGenoogleSmithWaterman aligner = new StringGenoogleSmithWaterman(match, replace, insert, delete, gapExtend); aligner.pairwiseAlignment(queryPiece, targetPiece); score += aligner.getScore(); identitySize += aligner.getIdentitySize(); if (s == 0) { this.queryStart = aligner.getQueryStart(); this.targetStart = aligner.getTargetStart(); } else { setBeginSubAlignment(queryPiece, targetPiece, aligner); } queryAlignedBuilder.append(aligner.getQueryAligned()); targetAlignedBuilder.append(aligner.getTargetAligned()); pathAlignedBuilder.append(aligner.getPath()); queryPos = queryPos + aligner.getQueryEnd(); targetPos = targetPos + aligner.getTargetEnd(); queryDiff = queryLength - aligner.getQueryEnd(); targetDiff = targetLength - aligner.getTargetEnd(); } this.queryEnd = query.length(); this.targetEnd = target.length(); return this.score; } /** * Format correctly the begin of this sub-alignment. * * @param queryPiece * @param targetPiece * @param aligner */ private void setBeginSubAlignment(String queryPiece, String targetPiece, StringGenoogleSmithWaterman aligner) { int i; for (i = 1; i < aligner.getQueryStart() && i < aligner.getTargetStart(); i++) { char queryChar = queryPiece.charAt(i); char targetChar = targetPiece.charAt(i); queryAlignedBuilder.append(queryChar); targetAlignedBuilder.append(targetChar); if (queryChar == targetChar) { pathAlignedBuilder.append('|'); score += match; identitySize++; } else { pathAlignedBuilder.append(' '); score += replace; } } while (i < aligner.getQueryStart() || i < aligner.getTargetStart()) { if (i < aligner.getQueryStart()) { queryAlignedBuilder.append(queryPiece.charAt(i - 1)); targetAlignedBuilder.append('-'); pathAlignedBuilder.append(' '); score += insert; } else { queryAlignedBuilder.append('-'); targetAlignedBuilder.append(targetPiece.charAt(i - 1)); pathAlignedBuilder.append(' '); score += insert; } i++; } } /** * Get the {@link String} representing the aligned query. * * @return aligned query. */ public String getQueryAligned() { if (queryAligned == null) { queryAligned = queryAlignedBuilder.toString(); } return queryAligned; } /** * Get the {@link String} representing the aligned target. * * @return aligned target. */ public String getTargetAligned() { if (targetAligned == null) { targetAligned = targetAlignedBuilder.toString(); } return targetAligned; } /** * Get the {@link String} representing the alignment path. * * @return alignment path. */ public String getPath() { if (pathAligned == null) { pathAligned = pathAlignedBuilder.toString(); } return pathAligned; } /** * * @return alignment score */ public int getScore() { return score; } /** * @return where the alignment begins at the query sequence. */ public int getQueryStart() { return queryStart; } /** * @return where the alignment ends at the query sequence. */ public int getQueryEnd() { return queryEnd; } /** * @return where the alignment begins at the target sequence. */ public int getTargetStart() { return targetStart; } /** * @return where the alignment ends at the target sequence. */ public int getTargetEnd() { return targetEnd; } /** * Get the identity size, it is, how many exact matches occurred in the alignment. * @return the alignment identity size. */ public int getIdentitySize() { return identitySize; } }