/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.viewer.gui;
import org.fhcrc.cpl.toolbox.proteomics.feature.Feature;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet;
import org.fhcrc.cpl.toolbox.proteomics.MSRun;
import org.fhcrc.cpl.viewer.Application;
import org.fhcrc.cpl.viewer.util.SharedProperties;
import org.fhcrc.cpl.toolbox.datastructure.Tree2D;
import org.fhcrc.cpl.toolbox.ApplicationContext;
import org.fhcrc.cpl.toolbox.proteomics.Protein;
import org.fhcrc.cpl.toolbox.proteomics.PeptideGenerator;
import org.fhcrc.cpl.toolbox.proteomics.Peptide;
import org.fhcrc.cpl.toolbox.proteomics.filehandler.FastaLoader;
import javax.swing.*;
import java.awt.event.ActionEvent;
import java.beans.PropertyChangeEvent;
import java.beans.PropertyChangeListener;
import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.Map;
/**
* User: migra
* Date: Jul 13, 2004
* Time: 1:08:05 PM
*/
public class CoverageCalculatorAction extends AbstractAction implements PropertyChangeListener
{
public CoverageCalculatorAction()
{
super("Calculate Peptide Coverage");
ApplicationContext.addPropertyChangeListener("fastaFile", this);
ApplicationContext.addPropertyChangeListener("featureSelector", this);
ApplicationContext.addPropertyChangeListener(SharedProperties.FEATURE_RANGES, this);
}
private CoverageInfo computeCoverage(Peptide[] peptides, float[] distances, float maxDistance)
{
int pepCount = 0;
int byteCount = 0;
int nextByte = 0;
for (int i = 0; i < distances.length; i++)
{
if (distances[i] > maxDistance)
continue;
pepCount++;
//Careful not to double-count bytes for overlapping peptides (missed cleavages)
int offset = peptides[i].getStart() + peptides[i].getLength();
if (offset > nextByte)
{
int len = Math.min(offset - nextByte, peptides[i].getLength());
byteCount += len;
nextByte = offset;
}
}
return new CoverageInfo(byteCount, pepCount);
}
/**
* Output a file with feature coverage information for
* each protein in the current fasta-file and the current set of
* features.
* Tab delimted output has following columns:
* Protein id, distance, length, coveredLength, peptides, coveredPeptides
* <p/>
* Search is done according to current feature & fasta settings
*/
public void actionPerformed(ActionEvent event)
{
OpenFastaDialog openFastaDialog = new OpenFastaDialog();
File fastaFile = openFastaDialog.getFastaFile();
if (null == fastaFile)
{
//JOptionPane.showMessageDialog(null, "No fasta file is open");
ApplicationContext.errorMessage("No fasta file is open.", null);
return;
}
MSRun run = (MSRun) ApplicationContext.getProperty(SharedProperties.MS_RUN);
if (null == run)
{
//JOptionPane.showMessageDialog(null, "No run file is open");
ApplicationContext.errorMessage("No run file is open.", null);
return;
}
java.util.List featureSets = (java.util.List) ApplicationContext.getProperty(SharedProperties.FEATURE_RANGES);
File runFile = run.getFile();
String outputFileName = runFile.getAbsolutePath() + ".coverage.tsv";
File outputFile = new File(outputFileName);
CoverageCalculator2D calculator = new CoverageCalculator2D(fastaFile, featureSets, outputFile, (float) openFastaDialog.getTolerance());
Thread t = new Thread(calculator);
t.start();
}
public void propertyChange(PropertyChangeEvent event)
{
Application app = Application.getInstance();
setEnabled(null != app.getProperty(SharedProperties.FEATURE_RANGES) && null != app.getProperty("fastaFile"));
}
private static class CoverageInfo
{
int coveredLength;
int coveredPeptides;
public CoverageInfo(int coveredLength, int coveredPeptides)
{
this.coveredLength = coveredLength;
this.coveredPeptides = coveredPeptides;
}
}
private class CoverageCalculator implements Runnable
{
private File _fastaFile;
private java.util.List _featureSets;
private File _outputFile;
private float _maxDistance;
public CoverageCalculator(File fastaFile, java.util.List featureSets, File outputFile, float maxDistance)
{
_fastaFile = fastaFile;
_featureSets = featureSets;
_outputFile = outputFile;
_maxDistance = maxDistance;
}
public void run()
{
PrintWriter out = null;
FastaLoader.ProteinIterator iterator = null;
try
{
out = new PrintWriter(new FileOutputStream(_outputFile));
out.print("protein\tlength\tpeptides");
OpenFastaDialog openFastaDialog = new OpenFastaDialog();
FastaLoader loader = new FastaLoader(_fastaFile);
iterator = (FastaLoader.ProteinIterator) loader.iterator();
float[][] featureSetMassArrays = new float[_featureSets.size()][];
for (int i = 0; i < _featureSets.size(); i++)
{
FeatureSet fs = (FeatureSet) _featureSets.get(i);
Feature[] features = fs.getFeatures();
float[] featureMasses = new float[features.length];
for (int j = 0; j < features.length; j++)
featureMasses[j] = features[j].mass;
Arrays.sort(featureMasses);
featureSetMassArrays[i] = featureMasses;
}
for (int i = 0; i < _featureSets.size(); i++)
out.print("\tcoveredLength" + i);
for (int i = 0; i < _featureSets.size(); i++)
out.print("\tcoveredPeptides" + i);
out.println();
int nProteins = 0;
while (iterator.hasNext())
{
Protein protein = (Protein) iterator.next();
String proteinId = protein.getLookup();
PeptideGenerator pepGen = new PeptideGenerator();
double[] massTab = openFastaDialog.getMassTab();
pepGen.setMassTable(massTab);
pepGen.setMaxMissedCleavages(openFastaDialog.getMissedCleavages());
Peptide[] peptides = pepGen.digestProtein(protein);
float[] distances = new float[peptides.length];
out.print(proteinId);
out.print("\t");
out.print(protein.getBytes().length);
out.print("\t");
out.print(peptides.length);
CoverageInfo[] coverage = new CoverageInfo[featureSetMassArrays.length];
for(int i = 0; i < featureSetMassArrays.length; i++)
{
float[] featureMasses = featureSetMassArrays[i];
for (int j = 0; j < peptides.length; j++)
{
double pepMass = peptides[j].getMass();
int index = Arrays.binarySearch(featureMasses, (float) pepMass);
double distance = Double.MAX_VALUE;
if (index >= 0)
distance = 0; //found an exact match
else
{
index = -index -1;
if (index > 0)
distance = Math.abs(pepMass - featureMasses[index - 1]);
if (index < featureMasses.length)
{
double dist2 = Math.abs(pepMass - featureMasses[index]);
if (dist2 < distance)
distance = dist2;
}
}
distances[j] = (float) distance;
}
coverage[i] = computeCoverage(peptides, distances, _maxDistance);
}
for (int i = 0; i < coverage.length; i++)
{
out.print("\t");
out.print(coverage[i].coveredLength);
}
for (int i = 0; i < coverage.length; i++)
{
out.print("\t");
out.print(coverage[i].coveredPeptides);
}
out.println();
if (nProteins++ % 100 == 0)
ApplicationContext.setMessage(String.valueOf(nProteins - 1) + " proteins processed.");
}
iterator = null;
}
catch (Exception x)
{
ApplicationContext.errorMessage(null, x);
}
finally
{
if (null != out)
out.close();
if (null != iterator)
iterator.close();
ApplicationContext.setMessage("Coverage information is in file " + _outputFile.getAbsolutePath());
}
}
}
private class CoverageCalculator2D implements Runnable
{
private static final String SLOPE_PROPERTY = "hydroSlope";
private static final String INTERCEPT_PROPERTY = "hydroIntercept";
private static final String SIGMA_PROPERTY = "hydroSigma";
private File _fastaFile;
private java.util.List _featureSets;
private File _outputFile;
private float _maxDistance;
private Tree2D[] _trees;
private double[] _slope;
private double[] _intercept;
private double[] _scanWindow;
public CoverageCalculator2D(File fastaFile, java.util.List featureSets, File outputFile, float maxDistance)
{
_fastaFile = fastaFile;
_featureSets = featureSets;
_outputFile = outputFile;
_maxDistance = maxDistance;
_slope = new double[featureSets.size()];
Arrays.fill(_slope, Double.NaN);
_intercept = new double[featureSets.size()];
Arrays.fill(_intercept, Double.NaN);
_scanWindow = new double[featureSets.size()];
Arrays.fill(_scanWindow, Double.NaN);
}
public CoverageCalculator2D(File fastaFile, java.util.List featureSets, File outputFile, float maxDistance,
double slope, double intercept, double sigma)
{
this(fastaFile, featureSets, outputFile, maxDistance);
Arrays.fill(_slope, slope);
Arrays.fill(_intercept, intercept);
Arrays.fill(_scanWindow, sigma * 2);
}
public void run()
{
_trees = new Tree2D[_featureSets.size()];
//Populate the trees
for (int i = 0; i < _trees.length; i++)
{
Tree2D tree = new Tree2D();
FeatureSet fs = ((FeatureSet) _featureSets.get(i));
Map properties = fs.getProperties();
if (properties.containsKey(SLOPE_PROPERTY))
_slope[i] = Double.parseDouble((String) properties.get(SLOPE_PROPERTY));
if (properties.containsKey(INTERCEPT_PROPERTY))
_intercept[i] = Double.parseDouble((String) properties.get(INTERCEPT_PROPERTY));
if (properties.containsKey(SIGMA_PROPERTY))
{
String sigma = (String ) properties.get(SIGMA_PROPERTY);
if (null != sigma)
_scanWindow[i] = 2 * Double.parseDouble(sigma);
}
Feature[] features = fs.getFeatures();
for (int j = 0; j < features.length; j++)
{
Feature feature = features[j];
tree.add(feature.getScan(),feature.getMass(), feature);
}
_trees[i] = tree;
}
PrintWriter out = null;
FastaLoader.ProteinIterator iterator = null;
try
{
out = new PrintWriter(new FileOutputStream(_outputFile));
out.print("protein\tlength\tpeptides");
OpenFastaDialog openFastaDialog = new OpenFastaDialog();
FastaLoader loader = new FastaLoader(_fastaFile);
iterator = (FastaLoader.ProteinIterator) loader.iterator();
for (int i = 0; i < _featureSets.size(); i++)
out.print("\tcoveredLength" + i);
for (int i = 0; i < _featureSets.size(); i++)
out.print("\tcoveredPeptides" + i);
out.println();
int nProteins = 0;
while (iterator.hasNext())
{
Protein protein = (Protein) iterator.next();
String proteinId = protein.getLookup();
PeptideGenerator pepGen = new PeptideGenerator();
double[] massTab = openFastaDialog.getMassTab();
pepGen.setMassTable(massTab);
pepGen.setMaxMissedCleavages(openFastaDialog.getMissedCleavages());
Peptide[] peptides = pepGen.digestProtein(protein);
float[] distances = new float[peptides.length];
out.print(proteinId);
out.print("\t");
out.print(protein.getBytes().length);
out.print("\t");
out.print(peptides.length);
CoverageInfo[] coverage = new CoverageInfo[_trees.length];
for(int i = 0; i < _trees.length; i++)
{
Tree2D tree = _trees[i];
for (int j = 0; j < peptides.length; j++)
{
float pepMass = (float) peptides[j].getMass();
float pepScan = 0;
//TODO: Make this depend on actual size of file.
float scanRange = 100000;
if (_slope[i] != Double.NaN)
{
pepScan = (float) (peptides[j].getHydrophobicity() * _slope[i] + _intercept[i]);
scanRange = (float) _scanWindow[i];
}
if (tree.containsPoints(pepScan - scanRange, pepMass - _maxDistance, pepScan + scanRange, pepMass + _maxDistance))
distances[i] = 0;
else
distances[i] = Float.MAX_VALUE;
}
coverage[i] = computeCoverage(peptides, distances, _maxDistance);
}
for (int i = 0; i < coverage.length; i++)
{
out.print("\t");
out.print(coverage[i].coveredLength);
}
for (int i = 0; i < coverage.length; i++)
{
out.print("\t");
out.print(coverage[i].coveredPeptides);
}
out.println();
if (nProteins++ % 100 == 0)
ApplicationContext.setMessage(String.valueOf(nProteins - 1) + " proteins processed.");
}
iterator = null;
}
catch (Exception x)
{
ApplicationContext.errorMessage(null, x);
}
finally
{
if (null != out)
out.close();
if (null != iterator)
iterator.close();
ApplicationContext.setMessage("Coverage information is in file " + _outputFile.getAbsolutePath());
}
}
}
}