package org.genedb.web.mvc.model.load; import org.apache.log4j.Logger; import org.biojava.bio.BioException; import org.biojava.bio.proteomics.IsoelectricPointCalc; import org.biojava.bio.proteomics.MassCalc; import org.biojava.bio.seq.ProteinTools; import org.biojava.bio.seq.io.SymbolTokenization; import org.biojava.bio.symbol.SimpleSymbolList; import org.biojava.bio.symbol.SymbolList; import org.biojava.bio.symbol.SymbolPropertyTable; import org.gmod.schema.utils.PeptideProperties; /** * * lo2@author sangerinstitute * Much of these methods contain snippets from the Polypeptide class */ public class PolypeptidePropertiesHelper { private static Logger logger = Logger.getLogger(PolypeptidePropertiesHelper.class); /** * Calculate the predicted properties of this polypeptide. * * @return a <code>PeptideProperties</code> object containing the predicted * properties of this polypeptide. */ public static PeptideProperties calculateStats(FeatureMapper polypeptideMapper) { if (polypeptideMapper.getResidues() == null) { logger.warn("No residues for '" + polypeptideMapper.getUniqueName() + "'"); return null; } String residuesString = new String(polypeptideMapper.getResidues()); SymbolList residuesSymbolList = null; PeptideProperties pp = new PeptideProperties(); try { SymbolTokenization proteinTokenization = ProteinTools.getTAlphabet().getTokenization("token"); residuesSymbolList = new SimpleSymbolList(proteinTokenization, residuesString); if (residuesSymbolList.length() == 0) { logger.error(String.format("Polypeptide feature '%s' has zero-length residues", polypeptideMapper.getUniqueName())); return pp; } try { // if the sequence ends with a termination symbol (*), we need to remove it if (residuesSymbolList.symbolAt(residuesSymbolList.length()) == ProteinTools.ter()) { if (residuesSymbolList.length() == 1) { logger.error(String.format("Polypeptide feature '%s' only has termination symbol", polypeptideMapper.getUniqueName())); return pp; } residuesSymbolList = residuesSymbolList.subList(1, residuesSymbolList.length() - 1); } } catch (IndexOutOfBoundsException exception) { throw new RuntimeException(exception); } } catch (BioException e) { logger.error("Can't translate into a protein sequence", e); return pp; } pp.setAminoAcids(residuesSymbolList.length()); try { double isoElectricPoint = new IsoelectricPointCalc().getPI(residuesSymbolList, false, false); pp.setIsoelectricPoint(isoElectricPoint); } catch (Exception e) { logger.error(String.format("Error computing protein isoelectric point for '%s'", residuesSymbolList), e); } double mass2 = calculateMass(polypeptideMapper, residuesSymbolList); if (mass2 != -1) { //mass = mass2; pp.setMass(mass2); } double charge = calculateCharge(residuesString); pp.setCharge(charge); return pp; } private static double calculateMass(FeatureMapper polypeptideMapper, SymbolList residuesSymbolList) { try { double massInDaltons = MassCalc.getMass(residuesSymbolList, SymbolPropertyTable.AVG_MASS, true); return massInDaltons; } catch (Exception exp) { logger.error(String.format("Error computing protein mass in '%s' because '%s'", polypeptideMapper.getUniqueName(), exp.getMessage())); } return -1.0; } /** * Calculate the charge of a polypeptide. * * @param residues a string representing the polypeptide residues, using the single-character code * @return the charge of this polypeptide (in what units?) */ private static double calculateCharge(String residues) { double charge = 0.0; for (char aminoAcid: residues.toCharArray()) { switch (aminoAcid) { case 'B': case 'Z': charge += -0.5; break; case 'D': case 'E': charge += -1.0; break; case 'H': charge += 0.5; break; case 'K': case 'R': charge += 1.0; break; /* * EMBOSS seems to think that 'O' (presumably Pyrrolysine) * also contributes +1 to the charge. According to Wikipedia, * this obscure amino acid is found only in methanogenic archaea, * so it's unlikely to trouble us soon. Still, it can't hurt: */ case 'O': charge += 1.0; break; } } return charge; } }