FeatureGrouper.java example

Explorer
msInspect-master
/*
 * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.fhcrc.cpl.toolbox.proteomics.feature;

import org.fhcrc.cpl.toolbox.datastructure.Pair;
import org.fhcrc.cpl.toolbox.datastructure.Tree2D;
import org.fhcrc.cpl.toolbox.ApplicationContext;
import org.fhcrc.cpl.toolbox.statistics.BasicStatistics;
import org.fhcrc.cpl.toolbox.normalize.Normalizer;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.MS2ExtraInfoDef;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.FeatureExtraInformationDef;
import org.fhcrc.cpl.toolbox.proteomics.Clusterer2D;

import org.apache.log4j.Logger;

import java.io.PrintWriter;
import java.util.*;

/**
 * Uses clustering (Clusterer2D) to align multiple feature sets
 *
 * dhmay changing 20100104, for peptide array changes.
 * Big changes: Clustering may optionally be done separately per charge state
 */
public class FeatureGrouper
{
    private static Logger _log = Logger.getLogger(FeatureGrouper.class);

    private List<FeatureSet> _featureSets = new ArrayList<FeatureSet>();

    //handles the actual clustering
    protected FeatureClusterer _featureClusterer;
    protected Map<Integer, FeatureClusterer> _chargeClustererMap = new HashMap<Integer, FeatureClusterer>();

    protected boolean _useMassInsteadOfMz = true;

    protected boolean _shouldGroupByCharge = false;
    protected Set<Integer> _allObservedCharges = new HashSet<Integer>();

    protected int _conflictResolver = CONFLICT_RESOLVER_SUM;

    //modes for determining which bucket settings are the best
    public static final int BUCKET_EVALUATION_MODE_ONE_FROM_EACH = 0;
    public static final int BUCKET_EVALUATION_MODE_PEPTIDE_AGREEMENT = 1;

    //These parameters determine the scoring used to decide what the best cluster
    //diameters are, when factoring in peptide agreement

    //minimum number of features in a bucket -- one per set -- to consider
    //the bucket a hit
    protected double bucketEvaluationPeptideAgreementMinAlignedNumSetsMultiple = 1;
    //minimum number of matching peptide IDs in the bucket to consider it a hit
    protected double bucketEvaluationPeptideAgreementMinPeptideIdsNumSetsMultiple = 0.5;
    //score for a matched bucket
    protected int bucketEvaluationPeptideAgreementMatchScore =
            DEFAULT_PEPTIDE_AGREEMENT_MATCH_SCORE;
    //penalty for a mismatched bucket
    protected int bucketEvaluationPeptideAgreementMismatchPenalty =
            DEFAULT_PEPTIDE_AGREEMENT_MISMATCH_PENALTY;

    public static final int DEFAULT_PEPTIDE_AGREEMENT_MATCH_SCORE=1;
    public static final int DEFAULT_PEPTIDE_AGREEMENT_MISMATCH_PENALTY=1;


    //how to resolve conflicts? I.e., features from the same set in the same
    //cluster
    public static final int CONFLICT_RESOLVER_SUM=0;
    public static final int CONFLICT_RESOLVER_BEST=1;
    public static final int CONFLICT_RESOLVER_MAX=2;


    //by default, sum the intensities of conflicting features
    public static final int DEFAULT_CONFLICT_RESOLVER=CONFLICT_RESOLVER_SUM;

    protected boolean showCharts = false;

    /**
     * Initialize the clusterer
     */
    public FeatureGrouper()
    {
        _featureClusterer = createFeatureClusterer();
    }


    protected FeatureClusterer createFeatureClusterer()
    {
        FeatureClusterer clusterer = new FeatureClusterer(_useMassInsteadOfMz ? FeatureClusterer.MASS_MZ_MODE_MASS :
                                                    FeatureClusterer.MASS_MZ_MODE_MZ,
                                     FeatureClusterer.ELUTION_MODE_TIME);
        if (_featureClusterer != null)
            clusterer.setMassType(_featureClusterer.getMassType());
        return clusterer;
    }

    /**
     * By default, simply evaluate the bucket settings based on the number of rows with
     * exactly one feature from each bucket
     * @param massOrMzBuckets
     * @param elutionBuckets,

     * @return
     */
    public Pair<Double, Double> calculateBestBuckets(double[] massOrMzBuckets,
                                                     double[] elutionBuckets)
    {
        return calculateBestBuckets(massOrMzBuckets, elutionBuckets,
                                    BUCKET_EVALUATION_MODE_ONE_FROM_EACH, false);
    }


    public Pair<Double, Double> calculateBestBuckets(double[] massOrMzBuckets,
                                                      double[] elutionBuckets,
                                                      int bucketEvaluationMode, boolean showCharts)
    {
        Pair<Double, Double> bestBuckets = null;
        switch (bucketEvaluationMode)
        {
            case BUCKET_EVALUATION_MODE_ONE_FROM_EACH:
                bestBuckets = _featureClusterer.calculateBestBuckets(massOrMzBuckets, elutionBuckets, showCharts);
                break;
            case BUCKET_EVALUATION_MODE_PEPTIDE_AGREEMENT:
                //evaluate bucket breakdowns based on the number of rows with minAligned
                //features, one from each represented set, minPeptideIds of which match
                //to the same peptide ID. Each passing row scores that breakdown matchScore
                //points, and each peptide mismatch loses it mismatchPenalty points
                int numSets = _featureClusterer.getClusterableArrays().size();

                //round up
                int bucketEvaluationPeptideAgreementMinPeptideIds =
                     (int) Math.round(bucketEvaluationPeptideAgreementMinPeptideIdsNumSetsMultiple *
                             numSets);
                //round up
                int bucketEvaluationPeptideAgreementMinAligned =
                     (int) Math.round(bucketEvaluationPeptideAgreementMinAlignedNumSetsMultiple *
                             numSets);


                double bestMassOrMzBucket = 0;
                double bestElutionBucket = 0;

                int[][] agreeingPeptides = new int[massOrMzBuckets.length][elutionBuckets.length];
                int bestNumAgreeingPeptides = -1;


                //evaluate all combinations of mass and hydrophobicity buckets
                for (int iMass = 0; iMass < massOrMzBuckets.length; iMass++)
                {
                    for (int iHydrophobicity = 0; iHydrophobicity < elutionBuckets.length;
                         iHydrophobicity++)
                    {
                        double elutionBucketsize = elutionBuckets[iHydrophobicity];
                        double massOrMzBucketsize = massOrMzBuckets[iMass];
                        if (elutionBucketsize <= 0 || massOrMzBucketsize <= 0)
                            continue;
                        split2D(massOrMzBucketsize, elutionBucketsize);

                        int bucketScore = 0;
                        Clusterer2D.BucketSummary[] bucketSummaries = summarize();



                        for (Clusterer2D.BucketSummary bucketSummary : bucketSummaries)
                        {
                            //see if there's one feature from each represented set in the bucket
                            if (bucketSummary.featureCount >= bucketEvaluationPeptideAgreementMinAligned &&
                                    bucketSummary.setCount == bucketSummary.featureCount)
                            {
                                String commonPeptide = null;
                                boolean failed = false;

                                int numMatched = 0;
                                for (Clusterer2D.Clusterable clusterable : bucketSummary.getParentList())
                                {
                                    Feature parentFeature =
                                            ((FeatureClusterer.FeatureClusterable) clusterable).getParentFeature();
                                    String peptide = MS2ExtraInfoDef.getFirstPeptide(parentFeature);
                                    if (peptide == null)
                                        continue;
                                    if (commonPeptide == null)
                                    {
                                        commonPeptide = peptide;
                                        numMatched++;
                                    }
                                    else
                                    {
                                        if (commonPeptide.equals(peptide))
                                            numMatched++;
                                        else
                                        {
                                            failed=true;
                                            break;
                                        }
                                    }
                                }
                                if (failed)
                                    bucketScore -= bucketEvaluationPeptideAgreementMismatchPenalty;
                                else if (numMatched >= bucketEvaluationPeptideAgreementMinPeptideIds)
                                {
                                    bucketScore += bucketEvaluationPeptideAgreementMatchScore;
                                }
                            }

                        }
                        _log.debug("Evaluating bucket size: mass " + massOrMzBucketsize + ", elution " + elutionBucketsize);
                        _log.debug("    Bucket score: " + bucketScore);

                        agreeingPeptides[iMass][iHydrophobicity] = bucketScore;

                        if (agreeingPeptides[iMass][iHydrophobicity] >
                                bestNumAgreeingPeptides)
                        {
                            bestMassOrMzBucket = massOrMzBucketsize;
                            bestElutionBucket = elutionBucketsize;
                            bestNumAgreeingPeptides = agreeingPeptides[iMass][iHydrophobicity];
                        }
                    }
                }
                ApplicationContext.setMessage("Best number of agreeing peptides: " + bestNumAgreeingPeptides);
                bestBuckets = new Pair<Double, Double>(bestMassOrMzBucket, bestElutionBucket);
                break;
        }
        return bestBuckets;
    }


    /**
     * Old school entry point; never does normalization
     */
    public void writePeptideArray(PrintWriter writer)
    {
        writePeptideArray(writer, false, false);
    }

    /**
     * by default, don't sum intensities
     * @param writer
     * @param normalize
     * @return the array of summaries intensities, normalized if normalize==true
     */
    public float[][] writePeptideArray(PrintWriter writer, boolean normalize)
    {
        return writePeptideArray(writer, normalize, false);
    }

    public static Feature[] getFeatures(Clusterer2D.BucketSummary bucketSummary)
    {
        List<Feature> resultList = new ArrayList<Feature>();
        for (Clusterer2D.TreeEntry treeEntry : bucketSummary.entries())
            resultList.add(((FeatureClusterer.FeatureClusterable) treeEntry.parent).getParentFeature());

        return resultList.toArray(new Feature[0]);
    }

    public static List<Feature> getFeaturesFromSet(int setIndex,
                                                   Clusterer2D.BucketSummary bucketSummary)
    {
        List<Feature> resultList = new ArrayList<Feature>();
        for (Clusterer2D.Clusterable clusterable : bucketSummary.getParentListForSetIndex(setIndex))
            resultList.add(((FeatureClusterer.FeatureClusterable) clusterable).getParentFeature());

        return resultList;
    }

    /**
     * Write out a peptide array. Optionally normalize.
     * If sumIntensities is true, all intensities from a given FeatureSet in the bucket will
     * be summed.  Otherwise, the intensity included will be from the feature that was picked
     * to represent the bucket
     * @param writer
     * @param normalize
     * @return the array of summaries intensities, normalized if normalize==true
     */
    public float[][] writePeptideArray(PrintWriter writer, boolean normalize,
                                  boolean sumIntensities)
    {
        //should we be writing out peptides and proteins?  Only if any featuresets
        //have those fields
        boolean writePeptidesAndProteins = false;

        //Write peptides and proteins iff any FeatureSet has peptide
        //and protein info
        for (int i = 0; i < _featureSets.size() && (!writePeptidesAndProteins); i++)
        {
            writePeptidesAndProteins =
                 _featureSets.get(i).hasExtraInformationType(MS2ExtraInfoDef.getSingletonInstance());
        }

        List<String> columns = new ArrayList<String>();
        columns.add("id");
        if (_shouldGroupByCharge)
            columns.add("charge");
        if (_useMassInsteadOfMz)
        {
                    columns.add("minMass");
            columns.add("maxMass");
        }
        else
        {
            columns.add("minMz");
            columns.add("maxMz");
        }

        //dhmay changing 20100311.  Now the peptide array will have different column names depending on scan or
        // time mode
        switch (_featureClusterer.getElutionMode())
        {
            case FeatureClusterer.ELUTION_MODE_TIME:
                columns.add("minTime"); columns.add("maxTime");
                break;
            default:
                columns.add("minScan"); columns.add("maxScan");
        }
        columns.add("featureCount"); columns.add("setCount");

        for (int i = 0; i < _featureSets.size(); i++)
        {
            FeatureSet fs = _featureSets.get(i);
            String featureSetColumnSuffix =
                    buildFeatureSetColumnSuffix(fs, i+1);
            columns.add("intensity" + featureSetColumnSuffix);
            if (writePeptidesAndProteins)
            {
                columns.add("peptide" + featureSetColumnSuffix);
                columns.add("protein" + featureSetColumnSuffix);
            }
        }

        for (int i=0; i<columns.size(); i++)
        {
            if (i>0)
                writer.print("\t");
            writer.print(columns.get(i));
        }
        writer.println();





        //TODO: probably this should be a parameter that gets passed in.
        //True, though, it only makes sense if writePeptidesAndProteins is true
        boolean showMultiplePeptideProteinMatches = writePeptidesAndProteins;

        List<FeatureClusterer> clusterersToSummarize = new ArrayList<FeatureClusterer>();
        List<Integer> chargesOfClusterers = new ArrayList<Integer>();
        if (_shouldGroupByCharge)
        {
            for (int charge : _chargeClustererMap.keySet())
            {
                FeatureClusterer clusterer = _chargeClustererMap.get(charge);
                if (clusterer.countAllEntries() == 0)
                {
                    _log.debug("split2D: skipping clusterer for charge " + charge + ", with 0 entries");

                }
                else
                {
                    clusterersToSummarize.add(clusterer);
                    chargesOfClusterers.add(charge);
                }
            }
        }
        else
        {
            clusterersToSummarize.add(_featureClusterer);
        }

        int numTotalRows = 0;
        List<Integer> firstRowIndexesEachSummarizer = new ArrayList<Integer>();


        List<Clusterer2D.BucketSummary[]> summariesAllClusterers = new ArrayList<Clusterer2D.BucketSummary[]>();
        for (int i=0; i<clusterersToSummarize.size(); i++)
        {
            FeatureClusterer clusterer = clusterersToSummarize.get(i);
            firstRowIndexesEachSummarizer.add(numTotalRows);

            if (_shouldGroupByCharge)
                ApplicationContext.setMessage("\tSummarizing charge " + chargesOfClusterers.get(i) + "...");
            Clusterer2D.BucketSummary[] summaries = clusterer.summarize();
            summariesAllClusterers.add(summaries);            
            if (_shouldGroupByCharge)
                ApplicationContext.setMessage("\t" + summaries.length + " clusters this charge");

            numTotalRows += summaries.length;
        }
        ApplicationContext.setMessage("Done clustering.");




        Clusterer2D.BucketSummary[] allSummaries = new Clusterer2D.BucketSummary[numTotalRows];
        for (int i=0; i<summariesAllClusterers.size(); i++)
        {
            Clusterer2D.BucketSummary[] summaries = summariesAllClusterers.get(i);
            System.arraycopy(summaries, 0, allSummaries, firstRowIndexesEachSummarizer.get(i), summaries.length);
        }
        summariesAllClusterers = null;
        System.gc();

        ApplicationContext.setMessage("Calculating intensities for " + allSummaries.length + " cells....");

        //resolve conflicts, determine intensities/features to represent rows

        //stupid java 1.5 can't handle generic array creation
        List<Feature>[][] allSummariesFeatures =
                (List<Feature>[][]) (new List[numTotalRows][_featureSets.size()]);
        float[][] allSummariesIntensities =
                new float[numTotalRows][_featureSets.size()];

        int arrayRowIndex = -1;
        FeatureSet[] featureSetsArray = new FeatureSet[_featureSets.size()];
        for (int i=0; i<_featureSets.size(); i++)
            featureSetsArray[i] = _featureSets.get(i);
        for (Clusterer2D.BucketSummary summary : allSummaries)
        {
            arrayRowIndex++;
            //order features.  If the resolver is currently "sum",
            //order them by "best" anyway; might as well do that so that the
            //peptides and proteins have a known ordering.
            //Order is descending order of goodness, whatever goodness is
            allSummariesFeatures[arrayRowIndex] =
                    orderClusterFeatures(summary, featureSetsArray,
                            _conflictResolver == CONFLICT_RESOLVER_SUM ?
                                    CONFLICT_RESOLVER_BEST : _conflictResolver);

            //Now, assign intensities.
            for (int j = 0; j < _featureSets.size(); j++)
            {
                allSummariesIntensities[arrayRowIndex][j] = 0;
                List<Feature> thisFeatureList = allSummariesFeatures[arrayRowIndex][j];
                if (thisFeatureList != null &&
                        !thisFeatureList.isEmpty())
                {
                    //sum all intensities
                    if (_conflictResolver == CONFLICT_RESOLVER_SUM)
                    {
                        for (Feature feature : thisFeatureList)
                        {
                            allSummariesIntensities[arrayRowIndex][j] += feature.getIntensity();
                        }
                    }
                    else
                    {
                        //pick just one intensity.  take the intensity of the
                        //first one, because they're in descending order of goodness.
                        allSummariesIntensities[arrayRowIndex][j] =
                                thisFeatureList.get(0).getIntensity();
                    }

                }
            }
        }
        ApplicationContext.setMessage("Done calculating intensities.");


        

        //throw away return value of normalizeSummaryIntensities
        if (normalize)
        {
            ApplicationContext.setMessage("Beginning normalization...");
            normalizeSummaryIntensities(allSummariesIntensities);
            ApplicationContext.setMessage("Normalization complete.");
        }
        int currentCharge = 0;
        int nextSummarizerFirstRowIndex = 0;
        int currentSummarizerIndex = -1;
        if (_shouldGroupByCharge)
        {
            currentCharge = chargesOfClusterers.get(0);
        }
        ApplicationContext.setMessage("Writing array...");
        for (int i = 0; i < numTotalRows; i++)
        {
            writer.print(i + 1 + "\t");
            if (_shouldGroupByCharge)
            {
                if (i == nextSummarizerFirstRowIndex)
                {
                    currentSummarizerIndex++;
                    currentCharge = chargesOfClusterers.get(currentSummarizerIndex);
                    ApplicationContext.setMessage("\tWriting charge " + currentCharge);
                    if (currentSummarizerIndex < chargesOfClusterers.size()-1)
                        nextSummarizerFirstRowIndex = firstRowIndexesEachSummarizer.get(currentSummarizerIndex+1);
                }
                writer.print(currentCharge + "\t");
            }
            writer.println(
                    createArrayRow(allSummaries[i],allSummariesFeatures[i],
                                   allSummariesIntensities[i],
                                   writePeptidesAndProteins,
                                   showMultiplePeptideProteinMatches));
        }
        return allSummariesIntensities;
    }

    /**
     * build the column suffix for this featureset.
     *
     * Separating this to make it easy to find and mess with.
     * @param featureSet
     * @return
     */
    protected String buildFeatureSetColumnSuffix(FeatureSet featureSet,
                                                 int featureSetNumber)
    {
        StringBuffer resultBuf = new StringBuffer("_");
        if (featureSet.getSourceFile() == null)
        {
            resultBuf.append(featureSetNumber);
            return resultBuf.toString();
        }
        //add characters from the filename until you run into the first "."
        //turn whitespace into _
        for (byte charByte : featureSet.getSourceFile().getName().getBytes())
        {
            char thisChar = (char) charByte;
            if (charByte == '.')
                break;
            if (Character.isSpaceChar(thisChar))
                thisChar = '_';
            resultBuf.append(thisChar);
        }

        return resultBuf.toString();
    }

    public float getElution(Feature feature)
    {
        switch (_featureClusterer.getElutionMode())
        {
            case FeatureClusterer.ELUTION_MODE_SCAN:
                return feature.getScan();
            default:
                return feature.getTime();
        }
    }

    /**
     * Filter each FeatureSet to include only those features that align across
     * at least minAligned sets. Intended for use when runs are grouped by
     * some tag (e.g. "case" or "control")
     */
    public FeatureSet[] filterByGroupedAlignment(int minAligned)
    {
        boolean useTree = false;
        Clusterer2D.BucketSummary[] clusters = _featureClusterer.summarize();
        FeatureSet[] featureSets = new FeatureSet[_featureSets.size()];
        for (int i = 0; i < _featureSets.size(); i++)
        {
            Tree2D tree = null;
            if (useTree)
            {
                tree = new Tree2D();
                for (Feature f :_featureSets.get(i).getFeatures())
                    tree.add(f.getMass(), getElution(f), f);
            }

            ArrayList<Feature> filteredFeatures = new ArrayList<Feature>();

            for (int j = 0; j < clusters.length; j++)
            {
                Clusterer2D.BucketSummary cluster = clusters[j];
                if (cluster.setCount >= minAligned)
                {
                    float minMass = (float) cluster.minDimension1;
                    float maxMass = (float) cluster.maxDimension1;
                    int minElution = (int) cluster.minDimension2;
                    int maxElution = (int) cluster.maxDimension2;

                    if (useTree)
                    {
                        // use Tree2D to pull out all features within the bucket
                        ArrayList<Feature> bucketFeatures = 
                            (ArrayList<Feature>) tree.getPoints(minMass, minElution, maxMass, maxElution);
                        for (Feature f : bucketFeatures)
                            filteredFeatures.add(f);
                    }
                    else
                    {
                        // Tree2D is not inclusive...
                        for (Feature f :_featureSets.get(i).getFeatures())
                        {
                            if (f.getMass() >= minMass && getElution(f) >= minElution &&
                                f.getMass() <= maxMass && getElution(f) <= maxElution)
                                filteredFeatures.add(f);
                        }
                    }
                }
            }
            Feature[] features = filteredFeatures.toArray(new Feature[0]);
            featureSets[i] = (FeatureSet) _featureSets.get(i).clone();
            featureSets[i].setFeatures(features);
        }
        return featureSets;
    }

    /**
     * Cover method
     * @param cluster
     * @param featureSets
     * @param conflictResolver
     * @return
     */
    public static List<Feature>[]
            orderClusterFeatures(Clusterer2D.BucketSummary cluster,
                                 List<FeatureSet> featureSets,
                                 int conflictResolver)
    {
        return orderClusterFeatures(cluster,
                featureSets.toArray(new FeatureSet[0]), conflictResolver);
    }

    /**
     * Resolve conflicts between features in a cluster by ordering them along
     * some metric
     *
     * Result is a list of Features in descending order of goodness
     * @param cluster
     * @param conflictResolver
     * @return an array of Features, one representing each FeatureSet from
     * featureSets if at least one feature from that FeatureSet was present.
     */
    public static List<Feature>[]
            orderClusterFeatures(Clusterer2D.BucketSummary cluster,
                                 FeatureSet[] featureSetArray,
                                 int conflictResolver)
    {
        //stupid Java can't create arrays of generics
        List<Feature>[] result = (List<Feature>[]) new List[featureSetArray.length];
        switch (conflictResolver)
        {
            case CONFLICT_RESOLVER_MAX:
                Comparator<Feature> intensityDescComp = new Feature.IntensityDescComparator();
                for (int i = 0; i < featureSetArray.length; i++)
                {
                    List<Clusterer2D.Clusterable> thisSetClusterables =
                            cluster.getParentListForSetIndex(i);
                    if (!thisSetClusterables.isEmpty())
                    {
                        ArrayList<Feature> featureList =
                                new ArrayList<Feature>(thisSetClusterables.size());
                        for (Clusterer2D.Clusterable featureClusterable : thisSetClusterables)
                        {
                            featureList.add(((FeatureClusterer.FeatureClusterable)
                                    featureClusterable).getParentFeature());
                        }
                        Collections.sort(featureList, intensityDescComp);
                        result[i] = featureList;
                    }
                }
                break;
            case CONFLICT_RESOLVER_BEST:
                for (int i = 0; i < featureSetArray.length; i++)
                {
                    List<Clusterer2D.Clusterable> thisSetClusterables =
                            cluster.getParentListForSetIndex(i);
                    if (!thisSetClusterables.isEmpty())
                    {
                        ArrayList<Feature> featureList =
                                new ArrayList<Feature>(thisSetClusterables.size());
                        for (Clusterer2D.Clusterable featureClusterable : thisSetClusterables)
                        {
                            featureList.add(((FeatureClusterer.FeatureClusterable)
                                    featureClusterable).getParentFeature());
                        }
                        Collections.sort(featureList,
                                Feature.PeaksKLComparatorDesc.getSingletonInstance());
                        result[i] = featureList;
                    }
                }
                break;
        }
        return result;
    }

    public Clusterer2D.BucketSummary[] summarize()
    {
        List<Clusterer2D.BucketSummary> resultList = new ArrayList<Clusterer2D.BucketSummary>();
        if (_shouldGroupByCharge)
        {
            for (int charge : _chargeClustererMap.keySet())
            {
                FeatureClusterer clusterer = _chargeClustererMap.get(charge);
                if (clusterer.countAllEntries() == 0)
                {
                    _log.debug("split2D: skipping clusterer for charge " + charge + ", with 0 entries");
                }
                else
                {
                    for (Clusterer2D.BucketSummary bucket : _chargeClustererMap.get(charge).summarize())
                    {
                        resultList.add(bucket);
                    }
                }
            }
        }
              
        return resultList.toArray(new Clusterer2D.BucketSummary[resultList.size()]);
    }

    /**
     * Create a single row for a peptide array.
     * @param summary
     * @param bucketFeatures
     * @param bucketIntensities
     * @return
     */
    protected static String createArrayRow(Clusterer2D.BucketSummary summary,
                                           List<Feature>[] bucketFeatures,
                                           float[] bucketIntensities,
                                           boolean writePeptidesAndProteins,
                                           boolean writeMultipleFeatures)
    {
        StringBuffer resultBuf = new StringBuffer(summary.toString());
        for (int i = 0; i < bucketIntensities.length; i++)
        {
            resultBuf.append("\t");
            if (bucketIntensities[i] > 0)
                resultBuf.append(bucketIntensities[i]);

            List<Feature> featureList = bucketFeatures[i];

            //make sure there are actually features to write out.
            boolean hasFeatures = (featureList != null &&
                                   !featureList.isEmpty());

            //if we're writing peptides and proteins, we've got a lot of work to do
            //to determine exactly how to do it
            if (writePeptidesAndProteins)
            {
                resultBuf.append("\t");

                if (writeMultipleFeatures)
                {
                    StringBuffer peptideBuf = new StringBuffer("");
                    StringBuffer proteinBuf = new StringBuffer("");

                    //If no features to write out, buffers remain empty
                    if (hasFeatures)
                    {
                        List<String> peptideListStrings =
                                new ArrayList<String>();
                        List<String> proteinListStrings =
                                new ArrayList<String>();

                        //For each feature, write out its peptide and protein lists,
                        //bracket-separated if necessary.
                        //Do not add duplicate peptide or protein lists
                        for (Feature feature : featureList)
                        {
                            List<String> peptideList =
                                    MS2ExtraInfoDef.getPeptideList(feature);
                            List<String> proteinList =
                                    MS2ExtraInfoDef.getProteinList(feature);
                            //if this peptide list contains duplicate peptides in
                            //sequence, only keep the first of each series.
                            //maintain the protein list in parallel.
                            if (peptideList != null && !peptideList.isEmpty())
                            {
                                MS2ExtraInfoDef.collapsePeptideSequentialDuplicates(feature);
                                String peptideListString =
                                        MS2ExtraInfoDef.convertStringListToString(peptideList);
                                //even if we have a peptide list with something in it,
                                //only add it if we don't already have an identical
                                //list
                                if (!peptideListStrings.contains(peptideListString))
                                {
                                    peptideListStrings.add(peptideListString);
                                    String proteinListString = null;
                                    if (proteinList != null &&
                                        !proteinList.isEmpty())
                                    {
                                        MS2ExtraInfoDef.collapseProteinSequentialDuplicates(
                                                feature);
                                        proteinListString =
                                                MS2ExtraInfoDef.convertStringListToString(
                                                    proteinList);
                                    }
                                    proteinListStrings.add(proteinListString);
                                }
                            }
                        }


                        //if we've got multiple peptides to write, have to
                        //separate them
                        boolean multiplePeptidesToWrite =
                                (peptideListStrings.size() > 1);

                        for (int j=0; j<peptideListStrings.size(); j++)
                        {
                            if (multiplePeptidesToWrite)
                                peptideBuf.append("[");
                            peptideBuf.append(peptideListStrings.get(j));
                            if (multiplePeptidesToWrite)
                                peptideBuf.append("]");
//System.err.println("writing eptides and proteins.  Peptides: " + peptideListStrings.size() + ", proteins: " + proteinListStrings.size());
                            if (proteinListStrings.get(j) != null)
                            {
                                if (multiplePeptidesToWrite)
                                    proteinBuf.append("[");
//System.err.println("   Writing protein " + proteinListStrings.get(j));
                                proteinBuf.append(proteinListStrings.get(j));
                                if (multiplePeptidesToWrite)
                                    proteinBuf.append("]");
                            }
                        }

                    }
                    resultBuf.append(peptideBuf);
                    resultBuf.append("\t");
                    resultBuf.append(proteinBuf);
                }
                else
                {
                    //not writing multiple features, just take the first
                    if (hasFeatures &&
                        MS2ExtraInfoDef.getFirstPeptide(featureList.get(0)) != null)
                        resultBuf.append(MS2ExtraInfoDef.getFirstPeptide(featureList.get(0)));
                    resultBuf.append("\t");
                    if (hasFeatures &&
                        MS2ExtraInfoDef.getFirstProtein(featureList.get(0)) != null)
                        resultBuf.append(MS2ExtraInfoDef.getFirstProtein(featureList.get(0)));
                }
            }
        }
        return resultBuf.toString();
    }


    /**
     * Write out all feature details (except peptide, protein)
     * @param writer
     * If true, writes the original, un-deconvoluted features
     */
    public void writeArrayDetails(PrintWriter writer, boolean writeMS2)
    {        
        writer.print("id\t");
        writer.print("file\t");
        FeatureExtraInformationDef[] extraInfos = writeMS2 ?
                new FeatureExtraInformationDef[] {MS2ExtraInfoDef.getSingletonInstance()} : null;
        writer.println(Feature.getFeatureHeader(extraInfos));

        List<FeatureClusterer> clusterersToSummarize = new ArrayList<FeatureClusterer>();
        if (_shouldGroupByCharge)
        {
            for (int charge : _chargeClustererMap.keySet())
            {
                clusterersToSummarize.add(_chargeClustererMap.get(charge));
            }
        }
        else
        {
            clusterersToSummarize.add(_featureClusterer);
        }

        int curRow = 0;
        for (FeatureClusterer clusterer : clusterersToSummarize)
        {
            if (clusterer.countAllEntries() == 0)
            {
                _log.debug("Empty clusterer, skipping");
                continue;
            }
            Clusterer2D.BucketSummary[] summaries = clusterer.summarize();
            for (Clusterer2D.BucketSummary summary : summaries)
            {
                curRow++;
                Clusterer2D.TreeEntry[] entries= summary.entries();
                for (Clusterer2D.TreeEntry entry : entries)
                {
                    String setName = getSetName(entry.iSet);
                    Feature deconvolutedFeature =
                            ((FeatureClusterer.FeatureClusterable) (entry.parent)).getParentFeature();
                    writeDetailsFeatureRow(writer, curRow, setName, deconvolutedFeature, extraInfos);
                }
                writer.flush();
            }
        }
    }

    /**
     * Helper method to write a single row in a details file representing one feature
     * @param writer
     * @param rowId
     * @param setName
     * @param feature
     */
    protected void writeDetailsFeatureRow(PrintWriter writer, int rowId, String setName, Feature feature,
                                          FeatureExtraInformationDef[] extraInfos)
    {
        writer.println((rowId) + "\t" + setName + "\t" + feature.toString(extraInfos));
    }

    /**
     * get the name of a FeatureSet, for the details file
     * @param i
     * @return
     */
    private String getSetName(int i)
    {
        FeatureSet fs = _featureSets.get(i);
        if (fs.getSourceFile() != null)
            return fs.getSourceFile().getName();
        else
            return String.valueOf(i);
    }


    /**
     * Normalize the intensities across all bucket summaries. Note that we don't
     * attempt to reflect normalization in the feature "details" file.
     */
    private boolean normalizeSummaryIntensities(float[][] allSummariesIntensities)
    {
        List<float[]> rows = new ArrayList<float[]>();
        for (int i = 0; i < allSummariesIntensities.length; i++)
            rows.add(allSummariesIntensities[i]);
        return Normalizer.normalize(rows, showCharts);
    }


    //cover methods on FeatureClusterer
    /**
     * Split based on a set of bucket sizes.  If not splitting by charge, just passes the command along to the clusterer.
     * If splitting by charge, splits each of the per-charge clusterers
     * @param dimension1Bucket
     * @param dimension2Bucket
     */
    public void split2D(double dimension1Bucket, double dimension2Bucket)
    {
        if (_shouldGroupByCharge)
        {
            for (FeatureClusterer featureClustererThisCharge : _chargeClustererMap.values())
            {
                 featureClustererThisCharge.split2D(dimension1Bucket, dimension2Bucket);
            }
        }
        else
        {
            _featureClusterer.split2D(dimension1Bucket, dimension2Bucket);
        }
    }

    public int numBuckets()
    {
        return _featureClusterer.numBuckets();
    }

    public int rowsWithOneFromEach()
    {
        if (_shouldGroupByCharge)
        {
            int oneFromEachRowCount = 0;
            for (int charge : _allObservedCharges)
            {
                oneFromEachRowCount += _chargeClustererMap.get(charge).rowsWithOneFromEach();
            }
            return oneFromEachRowCount;
        }
        else
            return _featureClusterer.rowsWithOneFromEach();
    }




    //getters, setters

    public boolean getGroupByMass()
    {
        return _useMassInsteadOfMz;
    }

    public void setGroupByMass(boolean groupByMass)
    {
        this._useMassInsteadOfMz = groupByMass;
        if (_shouldGroupByCharge)
        {
            for (FeatureClusterer fc : _chargeClustererMap.values())
                fc.setMassMzMode(_useMassInsteadOfMz ? FeatureClusterer.MASS_MZ_MODE_MASS :
                                                              FeatureClusterer.MASS_MZ_MODE_MZ);
        }
        else
        {
            _featureClusterer.setMassMzMode(_useMassInsteadOfMz ? FeatureClusterer.MASS_MZ_MODE_MASS :
                    FeatureClusterer.MASS_MZ_MODE_MZ);
        }
    }

    /**
     * Add a featureset.  If we're not separating by charge, this is trivial.  If we are, then we need to
     * separate out features by charge, then create a featureset for each charge, then add each of those to
     * the featureclusterer for that charge
     * @param fs
     */
    public void addSet(FeatureSet fs)
    {
        if (_shouldGroupByCharge)
        {

            Map<Integer,List<Feature>> chargeFeatureMap = new HashMap<Integer,List<Feature>>();

            Set<Integer> observedCharges = new HashSet<Integer>();
            for (Feature feature : fs.getFeatures())
            {
                if (feature.getCharge() == 0)
                    continue;
                observedCharges.add(feature.charge);

                List<Feature> featuresThisCharge = chargeFeatureMap.get(feature.getCharge());
                if (featuresThisCharge == null)
                {
                    featuresThisCharge = new ArrayList<Feature>();
                    chargeFeatureMap.put(feature.getCharge(), featuresThisCharge);
                    _allObservedCharges.add(feature.getCharge());
                }
                featuresThisCharge.add(feature);
            }

            for (int charge : observedCharges)
            {
                FeatureSet fsThisCharge = (FeatureSet) fs.clone();
                if (chargeFeatureMap.containsKey(charge))
                    fsThisCharge.setFeatures(chargeFeatureMap.get(charge).toArray(new Feature[chargeFeatureMap.get(charge).size()]));
                else fsThisCharge.setFeatures(new Feature[0]);

                FeatureClusterer featureClustererThisCharge = _chargeClustererMap.get(charge);
                if (featureClustererThisCharge == null)
                {                           
                    featureClustererThisCharge = createFeatureClusterer();
                    _chargeClustererMap.put(charge, featureClustererThisCharge);
                }
                featureClustererThisCharge.addSet(fsThisCharge);
            }
        }
        else
        {
            _featureClusterer.addSet(fs);
        }
        _featureSets.add(fs);
    }

    public FeatureSet getSet(int i)
    {
        return _featureClusterer.getSet(i);
    }

    public int getConflictResolver()
    {
        return _conflictResolver;
    }

    public void setConflictResolver(int conflictResolver)
    {
        this._conflictResolver = conflictResolver;
    }

    public double getBucketEvaluationPeptideAgreementMinAlignedNumSetsMultiple()
    {
        return bucketEvaluationPeptideAgreementMinAlignedNumSetsMultiple;
    }

    public void setBucketEvaluationPeptideAgreementMinAlignedNumSetsMultiple(double bucketEvaluationPeptideAgreementMinAlignedNumSetsMultiple)
    {
        this.bucketEvaluationPeptideAgreementMinAlignedNumSetsMultiple = bucketEvaluationPeptideAgreementMinAlignedNumSetsMultiple;
    }

    public double getBucketEvaluationPeptideAgreementMinPeptideIdsNumSetsMultiple()
    {
        return bucketEvaluationPeptideAgreementMinPeptideIdsNumSetsMultiple;
    }

    public void setBucketEvaluationPeptideAgreementMinPeptideIdsNumSetsMultiple(double bucketEvaluationPeptideAgreementMinPeptideIdsNumSetsMultiple)
    {
        this.bucketEvaluationPeptideAgreementMinPeptideIdsNumSetsMultiple = bucketEvaluationPeptideAgreementMinPeptideIdsNumSetsMultiple;
    }

    public int getBucketEvaluationPeptideAgreementMatchScore()
    {
        return bucketEvaluationPeptideAgreementMatchScore;
    }

    public void setBucketEvaluationPeptideAgreementMatchScore(int bucketEvaluationPeptideAgreementMatchScore)
    {
        this.bucketEvaluationPeptideAgreementMatchScore = bucketEvaluationPeptideAgreementMatchScore;
    }

    public int getBucketEvaluationPeptideAgreementMismatchPenalty()
    {
        return bucketEvaluationPeptideAgreementMismatchPenalty;
    }

    public void setBucketEvaluationPeptideAgreementMismatchPenalty(int bucketEvaluationPeptideAgreementMismatchPenalty)
    {
        this.bucketEvaluationPeptideAgreementMismatchPenalty = bucketEvaluationPeptideAgreementMismatchPenalty;
    }

    public int getMassType()
    {
        return _featureClusterer.getMassType();
    }

    public void setMassType(int _massType)
    {
        if (_shouldGroupByCharge)
        {
            for (FeatureClusterer fc : _chargeClustererMap.values())
                fc.setMassType(_massType);
        }
        else
        {
            _featureClusterer.setMassType(_massType);
        }
    }

    public int getElutionMode()
    {
        return _featureClusterer.getElutionMode();
    }

    public void setElutionMode(int _elutionMode)       
    {
        if (_shouldGroupByCharge)
        {
            for (FeatureClusterer fc : _chargeClustererMap.values())
                fc.setElutionMode(_elutionMode);
        }
        else
        {
            _featureClusterer.setElutionMode(_elutionMode);
        }
    }

    public boolean isGroupByCharge()
    {
        return _shouldGroupByCharge;
    }

    public void setGroupByCharge(boolean _shouldGroupByCharge)
    {
        this._shouldGroupByCharge = _shouldGroupByCharge;
    }

    public FeatureClusterer getFeatureClusterer()
    {
        return _featureClusterer;
    }

    public boolean isShowCharts() {
        return showCharts;
    }

    public void setShowCharts(boolean showCharts) {
        this.showCharts = showCharts;
    }
}