package com.ppfold.algo; import java.io.Serializable; import java.util.ArrayList; import java.util.List; import statalign.distance.Distance; import statalign.postprocess.utils.Mapping; public class FuzzyAlignment implements Serializable { /** * */ private static final long serialVersionUID = 5984201565048925527L; public int length; public List<String> sequences; public List<String> names; public List<AlignmentData> alignments; public List<FuzzyNucleotide[]> columns; public List<Integer> mapping; public boolean useExpectedFrequencies = true; public int getNumSequences() { return sequences.size(); } public static FuzzyAlignment getFuzzyAlignment(List<AlignmentData> alignments) { FuzzyAlignment fuzzyAlignment = new FuzzyAlignment(); fuzzyAlignment.sequences = alignments.get(0).sequences; fuzzyAlignment.names = alignments.get(0).names; fuzzyAlignment.length = alignments.get(0).sequences.get(0).length(); fuzzyAlignment.alignments = alignments; int numSequences = alignments.get(0).sequences.size(); double numAlignmentsDouble = (double) alignments.size(); int length = alignments.get(0).sequences.get(0).length(); fuzzyAlignment.columns = new ArrayList<FuzzyNucleotide[]>(length); for(int i = 0 ; i < length ; i++) { FuzzyNucleotide [] fuzzyNucleotide = new FuzzyNucleotide[numSequences]; for(int j = 0 ; j < fuzzyNucleotide.length ; j++) { fuzzyNucleotide[j] = new FuzzyNucleotide(); } fuzzyAlignment.columns.add(fuzzyNucleotide); } for(int al = 0 ; al < alignments.size() ; al++) { AlignmentData a = alignments.get(al); for(int row = 0 ; row < a.sequences.size() ; row++) { String seq = a.sequences.get(row); for(int col = 0 ; col < seq.length() ; col++) { double [] ambiguities = normalize(MatrixTools.createSVector(seq.charAt(col))); for(int l = 0 ; l < ambiguities.length ; l++) { fuzzyAlignment.columns.get(col)[row].probability[l] += ambiguities[l] / numAlignmentsDouble; } } } } //System.out.println(fuzzyAlignment.length+" fuzzy\n"+fuzzyAlignment+"\n"); return fuzzyAlignment; } /* public static double[] createSVector2(final char nt) { final double[] vector = MatrixTools.createVector(4, 0.01); //uncertainty in nucleotide //final double[] vector = createVector(4, 0.00); final char[] nts = MatrixTools.createNts(nt); double count = 0; for (final char n : nts) { if (n == 'a') { vector[0] = 1; count++; } if (n == 'u') { vector[1] = 1; count++; } if (n == 'g') { vector[2] = 1; count++; } if (n == 'c') { vector[3] = 1; count++; } } for(int i = 0 ; i < vector.length ; i++) { vector[i] /= count; } return vector; }*/ public static FuzzyAlignment getFuzzyAlignmentAndProject(List<AlignmentData> alignments, int seqno) { ArrayList<AlignmentData> projectedAlignments = new ArrayList<AlignmentData>(); for(int i = 0 ; i < alignments.size() ; i++) { projectedAlignments.add(projectAlignment(alignments.get(i), seqno)); } return getFuzzyAlignment(projectedAlignments); } public static FuzzyAlignment getFuzzyAlignmentAndProject(List<AlignmentData> alignments, String refSeqName) { ArrayList<AlignmentData> projectedAlignments = new ArrayList<AlignmentData>(); for(int i = 0 ; i < alignments.size() ; i++) { projectedAlignments.add(projectAlignment(alignments.get(i), refSeqName)); } return getFuzzyAlignment(projectedAlignments); } public static AlignmentData projectAlignment(List<String> sequences, List<String> inputNames, String refSeqName) { ArrayList<String> names = new ArrayList<String>(inputNames.size()); for(int i = 0 ; i < inputNames.size() ; i++) { names.add(inputNames.get(i).trim()); } int seqno = names.indexOf(refSeqName.trim()); if(seqno == -1) { System.err.println("Could not find refseqname:" +refSeqName); } seqno = Math.max(seqno, 0); AlignmentData projectedAlignment = new AlignmentData(); projectedAlignment.sequences = new ArrayList<String>(); projectedAlignment.names = names; String refSeq = sequences.get(seqno); for(int i = 0 ; i < sequences.size() ; i++) { projectedAlignment.sequences.add(Mapping.projectSequence(refSeq, sequences.get(i), '-')); } return projectedAlignment; } public static AlignmentData projectAlignment(AlignmentData input, int seqno) { AlignmentData projectedAlignment = new AlignmentData(); projectedAlignment.sequences = new ArrayList<String>(); projectedAlignment.names = input.names; String refSeq = input.sequences.get(seqno); for(int i = 0 ; i < input.sequences.size() ; i++) { projectedAlignment.sequences.add(Mapping.projectSequence(refSeq, input.sequences.get(i), '-')); } return projectedAlignment; } public static AlignmentData projectAlignment(AlignmentData input, String refSeqName) { ArrayList<String> names = new ArrayList<String>(input.names.size()); for(int i = 0 ; i < input.names.size() ; i++) { names.add(input.names.get(i).trim()); } int seqno = names.indexOf(refSeqName.trim()); if(seqno == -1) { System.err.println("Could not find refseqname:" +refSeqName); } seqno = Math.max(seqno, 0); return projectAlignment(input, seqno); } public String toString() { String ret = ""; for(int i = 0 ; i < columns.get(0).length; i++) { ret += ">"+names.get(i)+"\n"+sequences.get(i)+"\n"; for(int j = 0 ; j < columns.size() ; j++) { ret += j+"["+columns.get(j)[i].toString()+"] "; } ret += "\n"; } return ret; } public static void main(String [] args) { AlignmentData a = new AlignmentData(); a.sequences.add("A-G"); a.sequences.add("AAT"); a.sequences.add("AAT"); a.names.add("a"); a.names.add("b"); a.names.add("c"); AlignmentData b = new AlignmentData(); b.sequences.add("A-A"); b.sequences.add("AAT"); b.sequences.add("AAT"); ArrayList<AlignmentData> alignments = new ArrayList<AlignmentData>(); alignments.add(a); alignments.add(b); FuzzyAlignment fuzzy = getFuzzyAlignmentAndProject(alignments, 0); System.out.println(fuzzy); } public double [] getFrequencies(int colA, int row, boolean useMapping) { int col1 = colA; if(useMapping) { col1 = mapping.get(col1); //System.out.println(colA + " ->" + col1); } double [] frequencies = new double[4]; double numAlignments = (double) alignments.size(); for(int i = 0 ; i < alignments.size() ; i++) { String seq1 = alignments.get(i).sequences.get(row); double [] ambiguities = MatrixTools.createSVector(seq1.charAt(col1)); for(int k = 0 ; k < ambiguities.length ; k++) { frequencies[k] += ambiguities[k]/numAlignments; } } return normalize(frequencies); } /*public static double [][] getFrequencyPairs(List<FuzzyNucleotide[]> columns1, List<FuzzyNucleotide[]> columns2) { }*/ public static double [] normalize(double [] vector) { double sum = 0; for(int i = 0 ; i < vector.length ; i++) { sum += vector[i]; } double [] array = new double[vector.length]; for(int i = 0 ; i < vector.length ; i++) { array[i] = vector[i] / sum; } return array; } public double [][] getFrequencyPairs(int colA, int colB, int row, boolean useMapping) { int col1 = colA; int col2 = colB; if(useMapping) { col1 = mapping.get(col1); col2 = mapping.get(col2); } //System.out.println("a,b="+col1+","+col2); double [][] nucleotidePairs = new double[4][4]; double numAlignments = (double) alignments.size(); for(int i = 0 ; i < alignments.size() ; i++) { String seq1 = alignments.get(i).sequences.get(row); double [] ambiguities1 = normalize(MatrixTools.createSVector(seq1.charAt(col1))); double [] ambiguities2 = normalize(MatrixTools.createSVector(seq1.charAt(col2))); double [][] result = new double[4][4]; MatrixTools.multiplyVectorVector(ambiguities1, ambiguities2, result); for(int j = 0 ; j < 4 ; j++) { for(int k = 0 ; k < 4 ; k++) { nucleotidePairs[j][k] += result[j][k]/numAlignments; } } } return nucleotidePairs; } public static double distance(FuzzyAlignment fz1, FuzzyAlignment fz2) { double distance = 0; for(int i = 0 ; i < fz1.columns.size() ; i++) { FuzzyNucleotide[] c1 = fz1.columns.get(i); FuzzyNucleotide[] c2 = fz2.columns.get(i); for(int j = 0 ; j < c1.length ; j++) { double [] p1 = normalize(c1[j].probability); double [] p2 = normalize(c2[j].probability); distance += euclideanDistance(p1, p2); } } return distance; } public static double AMA(FuzzyAlignment fz1, FuzzyAlignment fz2){ int sum = 0; for(int i = 0; i<fz1.getNumSequences(); ++i){ sum += fz1.sequences.get(i).length(); } return 1 - FuzzyAlignment.distance(fz1, fz2)/ (double)( sum ); } public static double euclideanDistance(double [] v1, double [] v2) { double distance =0; for(int i = 0 ; i < v1.length ; i++) { distance += Math.pow(v1[i]-v2[i], 2); } return Math.sqrt(distance); } }