package bio.pih.genoogle.seq.protein; import java.util.Arrays; import java.util.HashMap; import bio.pih.genoogle.encoder.SequenceEncoder; import bio.pih.genoogle.encoder.SequenceEncoderFactory; import bio.pih.genoogle.io.Utils; import bio.pih.genoogle.seq.AminoAcid; import bio.pih.genoogle.seq.AminoAcidAlphabet; import bio.pih.genoogle.seq.Codon; import bio.pih.genoogle.seq.IllegalSymbolException; import bio.pih.genoogle.seq.LightweightSymbolList; import bio.pih.genoogle.seq.Reduced_AA_8_Alphabet; import bio.pih.genoogle.seq.SymbolList; import bio.pih.genoogle.util.SymbolListWindowIterator; import bio.pih.genoogle.util.SymbolListWindowIteratorFactory; public class Converter { static SymbolListWindowIteratorFactory factory = SymbolListWindowIteratorFactory .getNotOverlappedFactory(); public static SymbolList dnaToProtein(SymbolList dna) { SymbolListWindowIterator iterator = factory .newSymbolListWindowIterator(dna, 3); StringBuilder protein = new StringBuilder(); while (iterator.hasNext()) { SymbolList next = iterator.next(); AminoAcid aa = Codon.INSTANCE.convert(next.seqString()); protein.append(aa.getSymbol()); } try { return LightweightSymbolList.createProtein(protein.toString()); } catch (IllegalSymbolException e) { e.printStackTrace(); return null; } } public static SymbolList dnaToProtein1(SymbolList dna) { return dnaToProtein(dna); } public static SymbolList dnaToProtein2(SymbolList dna) { return dnaToProtein(dna.subSymbolList(2, dna.getLength())); } public static SymbolList dnaToProtein3(SymbolList dna) { return dnaToProtein(dna.subSymbolList(3, dna.getLength())); } public static SymbolList dnaToProteinComplement1(SymbolList dna) { String inverted = Utils.invert(dna.seqString()); String rcString = Utils.sequenceComplement(inverted); try { SymbolList sequence = dna.createSequence(rcString); return dnaToProtein(sequence); } catch (IllegalSymbolException e) { e.printStackTrace(); return null; } } public static SymbolList dnaToProteinComplement2(SymbolList dna) { String inverted = Utils.invert(dna.seqString()); String rcString = Utils.sequenceComplement(inverted); String substring = rcString.substring(1); try { SymbolList sequence = dna.createSequence(substring); return dnaToProtein(sequence); } catch (IllegalSymbolException e) { e.printStackTrace(); return null; } } public static SymbolList dnaToProteinComplement3(SymbolList dna) { String inverted = Utils.invert(dna.seqString()); String rcString = Utils.sequenceComplement(inverted); String substring = rcString.substring(2); try { SymbolList sequence = dna.createSequence(substring); return dnaToProtein(sequence); } catch (IllegalSymbolException e) { e.printStackTrace(); return null; } } static HashMap<Character, Character> proteinToReducedMap = new HashMap<Character, Character>(); static { proteinToReducedMap.put('A', 'A'); proteinToReducedMap.put('V', 'A'); proteinToReducedMap.put('C', 'C'); proteinToReducedMap.put('G', 'C'); proteinToReducedMap.put('N', 'C'); proteinToReducedMap.put('P', 'C'); proteinToReducedMap.put('D', 'D'); proteinToReducedMap.put('E', 'E'); proteinToReducedMap.put('K', 'E'); proteinToReducedMap.put('R', 'E'); proteinToReducedMap.put('Q', 'E'); proteinToReducedMap.put('F', 'F'); proteinToReducedMap.put('W', 'F'); proteinToReducedMap.put('Y', 'F'); proteinToReducedMap.put('H', 'F'); proteinToReducedMap.put('I', 'I'); proteinToReducedMap.put('L', 'I'); proteinToReducedMap.put('M', 'I'); proteinToReducedMap.put('S', 'S'); proteinToReducedMap.put('T', 'S'); proteinToReducedMap.put('#', 'X'); proteinToReducedMap.put('$', 'X'); } public static SymbolList proteinToReducedAA(SymbolList protein) { StringBuilder r = new StringBuilder(); if (protein.getAlphabet() != AminoAcidAlphabet.SINGLETON) { throw new RuntimeException("Invalid alphabet " + protein.getAlphabet()); } for (int i = 1; i <= protein.getLength(); i++) { r.append(proteinToReducedMap.get(protein.symbolAt(i))); } try { return LightweightSymbolList.createReducedAA(r.toString()); } catch (IllegalSymbolException e) { e.printStackTrace(); return null; } } public static String proteinToReducedAAString(String protein) { StringBuilder r = new StringBuilder(); for (int i = 0; i < protein.length(); i++) { r.append(proteinToReducedMap.get(protein.charAt(i))); } return r.toString(); } public static SymbolList dnaToReducedAA(SymbolList dna) { SymbolListWindowIterator iterator = factory.newSymbolListWindowIterator(dna, 3); StringBuilder r = new StringBuilder(); while (iterator.hasNext()) { SymbolList next = iterator.next(); AminoAcid aa = Codon.INSTANCE.convert(next.seqString()); r.append(proteinToReducedMap.get(aa.getSymbol())); } try { return LightweightSymbolList.createReducedAA(r.toString()); } catch (IllegalSymbolException e) { e.printStackTrace(); return null; } } public static void main(String[] args) throws IllegalSymbolException { String s = "TAAAAACGTGCAGGCCAACGGTACGGAAAAAGCAGCAAAAGCCTACCTGAACTGGCTCTACAGCCCGCAGGCGCAGACCATCATCACCGACTATTACTAC"; SymbolList createDNA = LightweightSymbolList.createDNA(s); SymbolList dnaToProtein = Converter.dnaToProtein(createDNA); SymbolList dnaToReduced= Converter.dnaToReducedAA(createDNA); System.out.println(dnaToProtein); System.out.println(dnaToReduced); SequenceEncoder encoder = SequenceEncoderFactory.getEncoder(Reduced_AA_8_Alphabet.SINGLETON, 3); int[] is = encoder.encodeSymbolListToIntegerArray(dnaToReduced); System.out.println(Arrays.toString(is)); System.out.println(encoder.decodeIntegerArrayToString(is)); } }