/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.viewer.amt;
import org.apache.log4j.Logger;
import org.fhcrc.cpl.toolbox.proteomics.Protein;
import org.fhcrc.cpl.toolbox.proteomics.feature.Feature;
import org.fhcrc.cpl.toolbox.BrowserController;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.MS2ExtraInfoDef;
import java.util.*;
import java.net.URL;
/**
* Methods for HTML display of protein sequences. Also, methods for creating URLs to access
* online resources for protein information
*/
public class ProteinDisplay
{
// constants for html protein display
protected static int DISPLAY_TYPE_BASE=0;
protected static int DISPLAY_TYPE_MATCHED_MS1=1;
protected static int DISPLAY_TYPE_MATCHED_MS2=2;
protected static int DISPLAY_TYPE_MATCHED_MS1_AND_MS2=3;
protected static int DISPLAY_TYPE_SELECTED=4;
//display modes
public static final int DISPLAY_MATCHED_FEATURES_MODE=0;
public static final int DISPLAY_UNMATCHED_MS2_FEATURES_MODE=1;
public static final int DISPLAY_UNMATCHED_PROTEIN_PEPTIDES_MODE=2;
private static Logger _log = Logger.getLogger(ProteinDisplay.class);
/**
* For a given protein and feature list, create an HTML string to display
* the protein
* @return
*/
public static String getProteinSequenceHtml(Protein protein, ArrayList<Feature> ms1MatchedFeatures,
ArrayList<Feature> ms2MatchedFeatures,
ArrayList<Feature> ms1AndMS2MatchedFeatures,
ArrayList<Feature> selectedFeatures,
int featureDisplayMode)
{
String proteinSequence = protein.getSequenceAsString();
int[] characterTypes = new int[proteinSequence.length()];
Arrays.fill(characterTypes, DISPLAY_TYPE_BASE);
boolean[] underlines = new boolean[proteinSequence.length()];
for (int i=0; i<underlines.length; i++)
underlines[i] = false;
if (featureDisplayMode == DISPLAY_MATCHED_FEATURES_MODE)
{
highlightFeatures(characterTypes, ms1MatchedFeatures, proteinSequence,
DISPLAY_TYPE_MATCHED_MS1);
underlineFeatures(underlines, ms1MatchedFeatures, proteinSequence);
if (ms2MatchedFeatures != null)
highlightFeatures(characterTypes, ms2MatchedFeatures, proteinSequence, DISPLAY_TYPE_MATCHED_MS2);
if (ms1AndMS2MatchedFeatures != null)
highlightFeatures(characterTypes, ms1AndMS2MatchedFeatures, proteinSequence, DISPLAY_TYPE_MATCHED_MS1_AND_MS2);
}
highlightFeatures(characterTypes, selectedFeatures, proteinSequence, DISPLAY_TYPE_SELECTED);
ArrayList<String> snippetsArray = createHtmlSnippets(characterTypes, underlines, proteinSequence);
return createHtmlString(snippetsArray);
}
/**
* Return html strings for the ms1MatchedFeatures that highlight them appropriately
* @param displayedFeatures
* @param ms1AndMS2MatchedFeatures
* @param selectedFeatures
* @param featureDisplayMode
* @return
*/
public static String[] getHtmlForFeatures( ArrayList<Feature> displayedFeatures,
ArrayList<Feature> ms1AndMS2MatchedFeatures,
ArrayList<Feature> selectedFeatures,
int featureDisplayMode)
{
if (displayedFeatures == null) return null;
String[] result = new String[displayedFeatures.size()];
int[] displayTypes = new int[displayedFeatures.size()];
for (int i = 0; i < displayedFeatures.size(); i++)
{
displayTypes[i] = DISPLAY_TYPE_BASE;
Feature currentFeature = displayedFeatures.get(i);
if (featureDisplayMode == DISPLAY_MATCHED_FEATURES_MODE)
{
displayTypes[i] = DISPLAY_TYPE_MATCHED_MS1;
if (peptideIsPartOf(MS2ExtraInfoDef.getFirstPeptide(currentFeature),ms1AndMS2MatchedFeatures))
{
displayTypes[i] = DISPLAY_TYPE_MATCHED_MS1_AND_MS2;
}
}
else if (featureDisplayMode == DISPLAY_UNMATCHED_MS2_FEATURES_MODE)
{
}
else if (featureDisplayMode == DISPLAY_UNMATCHED_PROTEIN_PEPTIDES_MODE)
{
}
if (selectedFeatures != null && selectedFeatures.contains(currentFeature))
displayTypes[i] = DISPLAY_TYPE_SELECTED;
}
ArrayList<String> throwaway = new ArrayList<String>(1);
throwaway.add("");
for (int i=0; i<displayedFeatures.size(); i++)
{
String snippet = createHtmlSnippet(MS2ExtraInfoDef.getFirstPeptide(displayedFeatures.get(i)),
displayTypes[i],
true);
throwaway.set(0,snippet);
result[i] = createHtmlString(throwaway);
}
return result;
}
protected static boolean peptideIsPartOf(String peptide, ArrayList<Feature> featureList)
{
if (featureList == null || peptide == null)
return false;
for (int i=0; i<featureList.size(); i++)
{
if (peptide.equals(MS2ExtraInfoDef.getFirstPeptide(featureList.get(i))))
return true;
}
return false;
}
/**
* Calculate the percent coverage of a protein by the peptides in a featureset.
* The only reason this is in ProteinDisplay is because it makes use of methods I'd
* already written for displaying the protein sequence
* @param features
* @param proteinSequence
* @return
*/
public static double calculatePercentCovered(ArrayList<Feature> features,
String proteinSequence)
{
int[] characterTypes = new int[proteinSequence.length()];
Arrays.fill(characterTypes, DISPLAY_TYPE_BASE);
highlightFeatures(characterTypes, features, proteinSequence, DISPLAY_TYPE_SELECTED);
int numResiduesCovered = 0;
for (int characterType : characterTypes)
{
if (characterType == DISPLAY_TYPE_SELECTED)
numResiduesCovered++;
}
//don't forget to convert to a percent
return (100.0 * (double) numResiduesCovered / (double) proteinSequence.length());
}
/**
* Find a set of peptides from a set of features and mark them, obliterating whatever
* marking they had already. So, note: last highlight takes all.
* @param characterTypes
* @param features
* @param proteinSequence
* @param highlightStyle
*/
protected static void highlightFeatures(int[] characterTypes, ArrayList<Feature> features,
String proteinSequence, int highlightStyle)
{
if (features == null)
return;
for (int i=0; i<features.size(); i++)
{
String currentPeptideString = MS2ExtraInfoDef.getFirstPeptide(features.get(i));
int currentPeptideLength = currentPeptideString.length();
int pepIndex = proteinSequence.indexOf(currentPeptideString);
while (pepIndex > 0)
{
for (int j=pepIndex; (j-pepIndex)<currentPeptideLength; j++)
characterTypes[j] = highlightStyle;
int restIndex = pepIndex + currentPeptideLength;
pepIndex = proteinSequence.indexOf(currentPeptideString, restIndex);
}
}
}
/**
* Indicate that certain features' residues should be underlined
* @param underlines
* @param features
* @param proteinSequence
*/
protected static void underlineFeatures(boolean[] underlines, ArrayList<Feature> features,
String proteinSequence)
{
if (features == null)
return;
for (int i=0; i<features.size(); i++)
{
String currentPeptideString = MS2ExtraInfoDef.getFirstPeptide(features.get(i));
int currentPeptideLength = currentPeptideString.length();
int pepIndex = proteinSequence.indexOf(currentPeptideString);
while (pepIndex > 0)
{
for (int j=pepIndex; (j-pepIndex)<currentPeptideLength; j++)
underlines[j] = true;
int restIndex = pepIndex + currentPeptideLength;
pepIndex = proteinSequence.indexOf(currentPeptideString, restIndex);
}
}
}
/**
* From an array of html snippets, create an html string for the whole protein. This can
* include extra formatting, since each snippet is independent
* @param snippetsArray
* @return
*/
protected static String createHtmlString(ArrayList<String> snippetsArray)
{
StringBuffer resultSB = new StringBuffer();
resultSB.append("<html><pre>");
for (int i=0; i<snippetsArray.size(); i++)
{
resultSB.append(snippetsArray.get(i));
//insert speces every 10th residue
int ioffset = i+1;
if (ioffset > 0 && (ioffset % 10 == 0))
if (ioffset % 60 == 0)
resultSB.append("\n");
else
resultSB.append(' ');
}
resultSB.append("</pre></html>");
return resultSB.toString();
}
/**
* Create an arraylist of html snippets for each residue. Styles for each character are controlled
* by the characterTypes and undlines arrays
* @param characterTypes
* @param proteinSequence
* @return
*/
protected static ArrayList<String> createHtmlSnippets(int[] characterTypes,
boolean[] underlines,
String proteinSequence)
{
ArrayList<String> result = new ArrayList<String>(proteinSequence.length());
for (int i=0; i<characterTypes.length; i++)
{
int displayType = characterTypes[i];
char residue = proteinSequence.charAt(i);
String snippet = createHtmlSnippet("" + residue,displayType,underlines[i]);
result.add(snippet);
}
return result;
}
/**
* Create an html snippet for a series of residues (may be just one residue)
* @param residues
* @param displayType
* @param shouldUnderline
* @return
*/
public static String createHtmlSnippet(String residues, int displayType,
boolean shouldUnderline)
{
String snippet = "";
if (displayType == DISPLAY_TYPE_BASE)
snippet = "" + residues;
else
{
boolean shouldBold = false;
String colorString = "#000000";
if (displayType == DISPLAY_TYPE_MATCHED_MS1)
{
//yellow
colorString = "#D0D000";
}
else if (displayType == DISPLAY_TYPE_MATCHED_MS2)
{
//red
colorString = "#FF0000";
}
else if (displayType == DISPLAY_TYPE_MATCHED_MS1_AND_MS2)
{
//Orange, bold
colorString = "#FFA000";
shouldBold = true;
}
else if (displayType == DISPLAY_TYPE_SELECTED)
{
//blue
colorString = "#0000FF";
}
snippet = "<font color=\"" + colorString + "\">" + residues + "</font>";
if (shouldBold)
snippet = "<b>" + snippet + "</b>";
if (shouldUnderline)
snippet = "<u>" + snippet + "</u>";
}
return snippet;
}
//base URL of the online NCBI Entrez viewer
protected static final String NCBIEntrezViewerBaseUrl="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi";
/**
* Create a URL that can be used to query the NCBI database
* @param queryValue
* @return
*/
public static URL createNCBIEntrezProteinURL(String queryValue)
{
String query = "db=protein&val=" + queryValue;
URL result = null;
try
{
result = new URL(NCBIEntrezViewerBaseUrl + "?" + query);
}
catch (Exception e){}
return result;
}
/**
* Go through all the lookups associated with this protein, in descending preference order
* based on the likelihood of finding the data at NCBI
* @param protein
*/
public static void openNCBIBrowserWindow(Protein protein)
{
Map<String, Set<String>> identifiers = protein.getIdentifierMap();
String queryValue=null;
if (identifiers.containsKey("SwissProt"))
queryValue = identifiers.get("SwissProt").iterator().next();
else if (identifiers.containsKey("SwissProtAccn"))
queryValue = identifiers.get("SwissProtAccn").iterator().next();
else if (identifiers.containsKey("IPI"))
queryValue = identifiers.get("IPI").iterator().next();
else if (identifiers.containsKey("ENSEMBL"))
queryValue = identifiers.get("ENSEMBL").iterator().next();
if (queryValue != null)
{
try
{
BrowserController.navigate(createNCBIEntrezProteinURL(queryValue));
}
catch (Exception e) {}
}
}
}