ProteinTree.java example

Explorer

compomics-utilities-master
- src
  - main
    - java
      - com
        compomics
        software
        CompomicsWrapper.java
        SplashScreen.java
        ToolFactory.java
        autoupdater
        CompareVersionNumbers.java
        DownloadLatestZipFromRepo.java
        FileDAO.java
        GUIFileDAO.java
        HeadlessFileDAO.java
        JarFileFilter.java
        MavenJarFile.java
        MetaDataXMLParser.java
        WebDAO.java
        cli
        CommandLineUtils.java
        CommandParameter.java
        dialogs
        JavaHomeDialog.java
        JavaHomeOrMemoryDialogParent.java
        JavaMemoryDialog.java
        JavaSettingsDialog.java
        PeptideShakerSetupDialog.java
        ProteoWizardSetupDialog.java
        ReporterSetupDialog.java
        SearchGuiSetupDialog.java
        settings
        PathKey.java
        UtilitiesPathPreferences.java
        gui
        PathSettingsDialog.java
        util
        AlternateRowColoursJTable.java
        BinaryArrayImpl.java
        CompareVersionNumbers.java
        Export.java
        FileAndFileFilter.java
        Util.java
        XYZDataPoint.java
        db
        ColumnTypeConverter.java
        DBAccessor.java
        DBAccessorGenerator.java
        DBMetaData.java
        DBResultSet.java
        DerbyUtil.java
        GeneratorException.java
        ObjectsCache.java
        ObjectsDB.java
        components
        Constructors.java
        DeleteableCode.java
        GettersAndSetters.java
        InstanceVariables.java
        PersistableCode.java
        RetrievableCode.java
        UpdateableCode.java
        interfaces
        DBElement.java
        Deleteable.java
        Persistable.java
        Retrievable.java
        Updateable.java
        enumeration
        CompomicsTools.java
        ImageType.java
        MolecularElement.java
        OperatingSystemEnum.java
        examples
        BareBonesBrowserLaunch.java
        HelpWindow.java
        MirroredSpectraTestDialog.java
        UtilitiesDemo.java
        exceptions
        ExceptionHandler.java
        exception_handlers
        CommandLineExceptionHandler.java
        FrameExceptionHandler.java
        WaitingDialogExceptionHandler.java
        experiment
        Constants.java
        MsExperiment.java
        ProteomicAnalysis.java
        SampleAnalysisSet.java
        ShotgunProtocol.java
        biology
        AminoAcid.java
        AminoAcidPattern.java
        AminoAcidSequence.java
        Atom.java
        AtomChain.java
        AtomImpl.java
        ElementaryElement.java
        Enzyme.java
        EnzymeFactory.java
        Ion.java
        IonFactory.java
        MassGap.java
        NeutralLoss.java
        PTM.java
        PTMFactory.java
        Peptide.java
        Protein.java
        Sample.java
        aminoacids
        Alanine.java
        Arginine.java
        Asparagine.java
        AsparticAcid.java
        B.java
        Cysteine.java
        GlutamicAcid.java
        Glutamine.java
        Glycine.java
        Histidine.java
        Isoleucine.java
        J.java
        Leucine.java
        Lysine.java
        Methionine.java
        Phenylalanine.java
        Proline.java
        Pyrrolysine.java
        Selenocysteine.java
        Serine.java
        Threonine.java
        Tryptophan.java
        Tyrosine.java
        Valine.java
        X.java
        Z.java
        atoms
        Carbon.java
        Fluorine.java
        Helium.java
        Hydrogen.java
        Iodine.java
        Lithium.java
        Nitrogen.java
        Oxygen.java
        Phosphorus.java
        Selenium.java
        Sodium.java
        Sulfur.java
        genes
        GeneFactory.java
        GeneMaps.java
        ensembl
        EnsemblVersion.java
        GeneMapping.java
        go
        GoDomains.java
        GoMapping.java
        ions
        ElementaryIon.java
        Glycan.java
        ImmoniumIon.java
        PeptideFragmentIon.java
        PrecursorIon.java
        RelatedIon.java
        ReporterIon.java
        TagFragmentIon.java
        mutations
        Mutation.java
        MutationFactory.java
        MutationMatrix.java
        taxonomy
        SpeciesFactory.java
        mappings
        BiomartMapping.java
        EnsemblGenomesSpecies.java
        EnsemblSpecies.java
        UniprotTaxonomy.java
        variants
        AaSubstitutionMatrix.java
        AminoAcidSubstitution.java
        Variant.java
        amino_acids
        Deletion.java
        Insertion.java
        Substitution.java
        Swap.java
        filtering
        Filter.java
        FilterItem.java
        FilterItemComparator.java
        identification
        Advocate.java
        Identification.java
        IdentificationDB.java
        IdentificationMatch.java
        IdentificationMethod.java
        SpectrumIdentificationAssumption.java
        TagFactory.java
        amino_acid_tags
        SequenceSegment.java
        Tag.java
        TagComponent.java
        matchers
        TagMatcher.java
        filtering
        PeptideAssumptionFilter.java
        identification_parameters
        IdentificationAlgorithmParameter.java
        IdentificationParametersFactory.java
        PtmSettings.java
        SearchParameters.java
        tool_specific
        AndromedaParameters.java
        CometParameters.java
        DirecTagParameters.java
        MsAmandaParameters.java
        MsgfParameters.java
        MyriMatchParameters.java
        NovorParameters.java
        OmssaParameters.java
        PNovoParameters.java
        PepnovoParameters.java
        TideParameters.java
        XtandemParameters.java
        identifications
        Ms2Identification.java
        matches
        IonMatch.java
        ModificationMatch.java
        PeptideMatch.java
        ProteinMatch.java
        SpectrumMatch.java
        VariantMatch.java
        matches_iterators
        PeptideMatchesIterator.java
        ProteinMatchesIterator.java
        PsmIterator.java
        parameters_cli
        AbstractIdentificationParametersCli.java
        EnzymesCLI.java
        EnzymesCLIInputBean.java
        EnzymesCLIParams.java
        IdentificationParametersCLIParams.java
        IdentificationParametersInputBean.java
        ModificationsCLI.java
        ModificationsCLIInputBean.java
        ModificationsCLIParams.java
        peptide_fragmentation
        FragmentItensityPredictor.java
        PeptideFragmentationModel.java
        predictors
        SequestFragmentationModel.java
        UniformFragmentation.java
        protein_inference
        PeptideMapper.java
        PeptideMapperType.java
        PeptideProteinMapping.java
        executable
        PeptideMapping.java
        fm_index
        FMIndex.java
        MatrixContent.java
        Rank.java
        WaveletTree.java
        proteintree
        Node.java
        ProteinTree.java
        ProteinTreeComponentsFactory.java
        protein_sequences
        FastaIndex.java
        ProteinSequenceIterator.java
        SequenceFactory.java
        psm_scoring
        PsmScore.java
        PsmScoresEstimator.java
        psm_scores
        AAIntensityRankScore.java
        AAMS2MzFidelityScore.java
        ComplementarityScore.java
        HyperScore.java
        IntensityRankScore.java
        MS2MzFidelityScore.java
        PrecursorAccuracy.java
        ptm
        PtmScore.java
        PtmSiteMapping.java
        PtmtableContent.java
        ptmscores
        AScore.java
        MDScore.java
        PhosphoRS.java
        spectrum_annotation
        AnnotationSettings.java
        IonMatchKeysCache.java
        NeutralLossesMap.java
        SpecificAnnotationSettings.java
        SpectrumAnnotator.java
        spectrum_annotators
        PeptideSpectrumAnnotator.java
        TagSpectrumAnnotator.java
        spectrum_assumptions
        PeptideAssumption.java
        TagAssumption.java
        io
        ExperimentIO.java
        identifications
        IdfileReader.java
        IdfileReaderFactory.java
        MzIdentMLIdfileSearchParametersConverter.java
        idfilereaders
        AndromedaIdfileReader.java
        DirecTagIdfileReader.java
        MsAmandaIdfileReader.java
        MzIdentMLIdfileReader.java
        NovorIdfileReader.java
        OnyaseIdfileReader.java
        PNovoIdfileReader.java
        PepNovoIdfileReader.java
        PepxmlIdfileReader.java
        TideIdfileReader.java
        massspectrometry
        MgfFileIterator.java
        MgfIndex.java
        MgfReader.java
        export
        AplExporter.java
        Ms2Exporter.java
        massspectrometry
        Charge.java
        FragmentationMethod.java
        MS1Spectrum.java
        MSnSpectrum.java
        Peak.java
        Precursor.java
        Spectrum.java
        SpectrumFactory.java
        indexes
        PrecursorMap.java
        SpectrumIndex.java
        proteowizard
        MsConvertParameters.java
        MsFormat.java
        ProteoWizardFilter.java
        gui
        MsConvertParametersDialog.java
        normalization
        NormalizationFactors.java
        patient
        Patient.java
        PatientInformation.java
        personalization
        ExperimentObject.java
        UrParameter.java
        quantification
        Quantification.java
        reporterion
        Reagent.java
        ReporterIonQuantification.java
        ReporterMethod.java
        ReporterMethodFactory.java
        spectrumcounting
        SpectrumCounting.java
        refinementparameters
        PepnovoAssumptionDetails.java
        units
        MetricsPrefix.java
        StandardUnit.java
        UnitOfMeasurement.java
        general
        CommandLineParser.java
        DefaultOutputLoggerImplementation.java
        EncapsulatedObject.java
        FileLoggerImplementation.java
        IsotopicDistribution.java
        IsotopicDistributionSpectrum.java
        IsotopicElement.java
        MassCalc.java
        Translate.java
        UnknownElementMassException.java
        servlet
        MassCalcServlet.java
        gui
        AaMass.java
        AminoAcidPatternDialog.java
        DummyFrame.java
        ExportFormatSelectionDialog.java
        FlamableJFrame.java
        GuiUtilities.java
        JExceptionDialog.java
        JFrameForBytes.java
        JLabelAndComponentPanel.java
        JOptionEditorPane.java
        JTableForDB.java
        MonitorDimension.java
        PrivacySettingsDialog.java
        SampleSelection.java
        TableMouseWheelListener.java
        TableProperties.java
        TableScrollBarListener.java
        UtilitiesGUIDefaults.java
        VennDiagramDialog.java
        VennDiagramLegendLocationDialog.java
        VennDiagramPanel.java
        VisibleTableColumnsDialog.java
        VisibleTableColumnsDialogParent.java
        XYPlotFiltersDialog.java
        XYPlottingDialog.java
        atoms
        AtomChainDialog.java
        AtomPanel.java
        error_handlers
        BugReport.java
        HelpDialog.java
        notification
        NotesDialog.java
        NotificationDialog.java
        NotificationDialogParent.java
        events
        RescalingEvent.java
        export
        graphics
        ExportGraphicsDialog.java
        report
        ReportEditor.java
        filehandling
        FileDisplayDialog.java
        FileSelectionDialog.java
        TempFilesManager.java
        genes
        GeneDetailsDialog.java
        SpeciesDialog.java
        interfaces
        SpectrumAnnotation.java
        SpectrumPanelListener.java
        isotopic_calculator
        IsotopeDistributionGui.java
        parameters
        IdentificationParametersEditionDialog.java
        IdentificationParametersOverviewDialog.java
        IdentificationParametersSelectionDialog.java
        OldProcessingPreferencesDialog.java
        ProcessingPreferencesDialog.java
        identification_parameters
        AlgorithmSettingsDialog.java
        AnnotationSettingsDialog.java
        EnzymeSelectionDialog.java
        FractionSettingsDialog.java
        GenePreferencesDialog.java
        IdentificationParametersNameDialog.java
        MatchesImportFiltersDialog.java
        PTMLocalizationParametersDialog.java
        PeptideVariantsSettingsDialog.java
        ProteinInferenceSettingsDialog.java
        PsmScoringSettingsDialog.java
        SearchSettingsDialog.java
        SequenceMatchingSettingsDialog.java
        ValidationQCPreferencesDialog.java
        ValidationQCPreferencesDialogParent.java
        ValidationSettingsDialog.java
        algorithm_settings
        AndromedaSettingsDialog.java
        CometSettingsDialog.java
        DirecTagSettingsDialog.java
        MsAmandaSettingsDialog.java
        MsgfSettingsDialog.java
        MyriMatchSettingsDialog.java
        NovorSettingsDialog.java
        OmssaSettingsDialog.java
        PNovoSettingsDialog.java
        PepNovoSettingsDialog.java
        TideSettingsDialog.java
        XTandemSettingsDialog.java
        protein
        AdvancedProteinDatabaseDialog.java
        ModificationProfile.java
        ProteinSequencePane.java
        SequenceModificationPanel.java
        ptm
        ModificationsDialog.java
        PtmDialog.java
        renderers
        AlignedListCellRenderer.java
        AlignedTableCellRenderer.java
        ByteArrayRenderer.java
        FragmentIonTableCellRenderer.java
        GraphicsPanelTableCellRenderer.java
        TimestampRenderer.java
        ToolTipComboBoxRenderer.java
        spectrum
        ChromatogramPanel.java
        DefaultSpectrumAnnotation.java
        FragmentIonTable.java
        GraphicsPanel.java
        IntensityHistogram.java
        IonLabelColorTableModel.java
        IsotopicDistributionPanel.java
        MassErrorBubblePlot.java
        MassErrorPlot.java
        ReferenceArea.java
        SequenceFragmentationPanel.java
        SpectrumPanel.java
        tablemodels
        SelfUpdatingTableModel.java
        utils
        ConnectionDialog.java
        DateChooser.java
        SwingUtils.java
        user_choice
        ListChooser.java
        list_choosers
        PtmChooser.java
        StringListChooser.java
        variants
        aa_substitutions
        AaSubstitutionMatrixTableModel.java
        waiting
        waitinghandlers
        ProgressDialog.java
        ProgressDialogX.java
        WaitingDialog.java
        WaitingHandlerCLIImpl.java
        WaitingHandlerDummy.java
        interfaces
        Connectable.java
        Flamable.java
        Logger.java
        Modification.java
        Monitorable.java
        PickUp.java
        Sequence.java
        SpectrumFile.java
        TableValueWrapper.java
        io
        ConfigurationFile.java
        FTP.java
        FTPClient.java
        FTPDownloader.java
        FileSystemAccessor.java
        FilenameExtensionFilter.java
        FolderMonitor.java
        FtpConnectionException.java
        FtpLoginException.java
        FtpProtocolException.java
        MascotEnzymeReader.java
        MonitorableFileInputStream.java
        MonitorableInputStream.java
        PklFile.java
        PropertiesManager.java
        PushBackStringReader.java
        SerializationUtils.java
        StartBrowser.java
        StreamGobbler.java
        compression
        TarUtils.java
        ZipUtils.java
        export
        ExportFactory.java
        ExportFeature.java
        ExportFormat.java
        ExportScheme.java
        ExportWriter.java
        WorkbookStyle.java
        styles
        DefaultStyle.java
        writers
        ExcelWriter.java
        TextWriter.java
        xml
        SimpleXmlWriter.java
        filefilters
        DatFileFilter.java
        DtaFileFilter.java
        FastaFileFilter.java
        FileFilterUtils.java
        JpegFileFilter.java
        MgfFileFilter.java
        Ms2FileFilter.java
        MzDataFileFilter.java
        MzMlFileFilter.java
        MzXmlFileFilter.java
        OmxFileFilter.java
        OutFileFilter.java
        PdfFileFilter.java
        PeffFileFilter.java
        PepXmlFileFilter.java
        PklFileFilter.java
        PklSpoFileFilter.java
        PkxFileFilter.java
        PngFileFilter.java
        ProtXmlFileFilter.java
        SequestParamsFileFilter.java
        SvgFileFilter.java
        TiffFileFilter.java
        XmlFileFilter.java
        json
        JsonMarshaller.java
        adapter
        FileAdapter.java
        InterfaceAdapter.java
        marshallers
        IdentificationParametersMarshaller.java
        PTMFactoryMarshaller.java
        PrideMarshaller.java
        junit
        TestCaseLM.java
        maps
        KeyUtils.java
        MapMutex.java
        math
        BasicMathFunctions.java
        BigFunctions.java
        BigMathUtils.java
        HistogramUtils.java
        VennDiagram.java
        clustering
        KMeansClustering.java
        settings
        KMeansClusteringSettings.java
        matrix
        DoubleMatrix.java
        roc
        DataRoc.java
        DistributionRoc.java
        statistics
        Distribution.java
        ROC.java
        distributions
        BinomialDistribution.java
        NonSymmetricalNormalDistribution.java
        NormalDistribution.java
        NormalKernelDensityEstimator.java
        linear_regression
        LinearRegression.java
        RegressionStatistics.java
        filters
        BinningFilter.java
        ProbabilityFilter.java
        regressions
        MedianRegression.java
        SimpleLinearRegression.java
        memory
        MemoryConsumptionStatus.java
        messages
        FeedBack.java
        nucleotide
        NucleotideSequence.java
        NucleotideSequenceImpl.java
        pdbfinder
        FindPdbForUniprotAccessions.java
        das
        readers
        AlignmentBlock.java
        DasAlignment.java
        DasAnnotationServerAlingmentReader.java
        DasAnnotationServerResultReader.java
        DasFeature.java
        StartEndPosition.java
        pdb
        PdbBlock.java
        PdbParameter.java
        preferences
        DigestionPreferences.java
        DummyParameters.java
        FractionSettings.java
        GenePreferences.java
        IdMatchValidationPreferences.java
        IdentificationParameters.java
        LastSelectedFolder.java
        MarshallableParameter.java
        PSProcessingPreferences.java
        PTMScoringPreferences.java
        PeptideVariantsPreferences.java
        ProcessingPreferences.java
        ProteinInferencePreferences.java
        PsmScoringPreferences.java
        SearchGuiOutputOption.java
        SequenceMatchingPreferences.java
        UtilitiesUserPreferences.java
        ValidationQCPreferences.java
        pride
        CvTerm.java
        PrideObject.java
        PrideObjectsFactory.java
        PrideWebService.java
        PtmToPrideMap.java
        prideobjects
        Contact.java
        ContactGroup.java
        Instrument.java
        Protocol.java
        Reference.java
        ReferenceGroup.java
        Sample.java
        webservice
        PrideQuery.java
        file
        FileType.java
        query
        PrideFilter.java
        PrideFilterType.java
        validation
        PrideXmlValidator.java
        XMLValidationErrorHandler.java
        protein
        AASequenceImpl.java
        DualEnzyme.java
        Enzyme.java
        Header.java
        ModificationFactory.java
        ModificationImplementation.java
        ModificationTemplate.java
        MolecularFormula.java
        Protein.java
        RegExEnzyme.java
        protein_sequences_manager
        DownloadingUtil.java
        ProteinSequencesManager.java
        UniProtQuery.java
        enums
        ModelOrganism.java
        SequenceContentType.java
        SequenceInputType.java
        gui
        ProteinSequencesManagerGUI.java
        SequenceDbDetailsDialog.java
        preferences
        ProteinSequencesPreferencesDialog.java
        sequences_import
        ImportSequencesFromDnaDialog.java
        ImportSequencesFromFilesDialog.java
        ImportSequencesFromUniprotDialog.java
        taxonomy
        ConnectionManager.java
        QueryType.java
        TaxonomyTreeDialog.java
        TaxonomyTreePanel.java
        UniprotTaxonomyProvider.java
        sun
        SwingWorker.java
        TableMap.java
        TableSorter.java
        waiting
        Duration.java
        WaitingActionListener.java
        WaitingHandler.java
  - test
    - java
      - com
        compomics
        util
        test
        FullSuite.java
        experiment
        FragmentFactoryTest.java
        io
        AminoAcidPatternParsingTest.java
        AtomChainParsingTest.java
        IdentificationDBTest.java
        identifications
        PepNovoIdfileReaderTest.java
        TestIdfileReaderFactory.java
        spectrum
        SpectrumImportTest.java
        sequences
        digestion
        DigestionTest.java
        ProteinSequenceIteratorTest.java
        indexing
        FMIndexTest.java
        ProteinTreeTest.java
        matching
        AminoAcidPatternTest.java
        SequenceMatchingTest.java
        spectrum
        indexing
        SpectrumAnnotationTest.java
        general
        PtmsiteMappingTest.java
        TestCommandLineParser.java
        TestIsotopicDistributionCalculator.java
        TestMassCalc.java
        servlet
        TstMassCalcServlet.java
        io
        TestFTPClient.java
        TestFilenameExtensionFilter.java
        TestFolderMonitor.java
        TestMascotEnzymeReader.java
        TestMonitorableFileInputStream.java
        TestMonitorableInputStream.java
        TestPushBackStringReader.java
        TestSearchParameterMarshaller.java
        math
        TestBigFunctions.java
        TestBinomialDistribution.java
        nucleotide
        TestNucleotideSequence.java
        TestNucleotideSequenceImpl.java
        protein
        TestAASequenceImpl.java
        TestDualEnzyme.java
        TestEnzyme.java
        TestHeader.java
        TestModificationFactory.java
        TestModificationImplementation.java
        TestProtein.java
        TestRegExEnzyme.java

package com.compomics.util.experiment.identification.protein_inference.proteintree;

import com.compomics.util.Util;
import com.compomics.util.db.DerbyUtil;
import com.compomics.util.exceptions.ExceptionHandler;
import com.compomics.util.experiment.biology.AminoAcid;
import com.compomics.util.experiment.biology.AminoAcidPattern;
import com.compomics.util.experiment.biology.AminoAcidSequence;
import com.compomics.util.experiment.biology.Enzyme;
import com.compomics.util.experiment.biology.Peptide;
import com.compomics.util.experiment.biology.Protein;
import com.compomics.util.experiment.identification.protein_sequences.SequenceFactory;
import com.compomics.util.experiment.identification.protein_sequences.SequenceFactory.ProteinIterator;
import com.compomics.util.experiment.identification.TagFactory;
import com.compomics.util.experiment.identification.amino_acid_tags.Tag;
import com.compomics.util.experiment.identification.amino_acid_tags.TagComponent;
import com.compomics.util.experiment.identification.amino_acid_tags.matchers.TagMatcher;
import com.compomics.util.experiment.identification.protein_inference.PeptideMapper;
import com.compomics.util.experiment.identification.protein_inference.PeptideProteinMapping;
import com.compomics.util.math.BasicMathFunctions;
import com.compomics.util.preferences.SequenceMatchingPreferences;
import com.compomics.util.preferences.SequenceMatchingPreferences.MatchingType;
import com.compomics.util.preferences.UtilitiesUserPreferences;
import com.compomics.util.waiting.WaitingHandler;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

/**
 * This class sorts the proteins into groups.
 *
 * @author Marc Vaudel
 */
public class ProteinTree implements PeptideMapper {

    /**
     * The memory allocation in MB.
     */
    private int memoryAllocation;
    /**
     * Approximate number of accession*node one can store in a GB of memory
     * (empirical value).
     */
    private static final long cacheScale = 10000;
    /**
     * Instance of the sequence factory.
     */
    private SequenceFactory sequenceFactory = SequenceFactory.getInstance();
    /**
     * The tree containing the accessions indexed by sequence tags.
     */
    private HashMap<String, Node> tree = new HashMap<String, Node>();
    /**
     * List of the nodes in tree.
     */
    private ArrayDeque<String> tagsInTree = new ArrayDeque<String>();
    /**
     * The size of the tree in memory in accession*node.
     */
    private long treeSize = 0;
    /**
     * Indicates whether a debug file with speed metrics shall be created.
     */
    private boolean debugSpeed = false;
    /**
     * Indicates whether the number of passages shall be displayed.
     */
    private boolean debugPassages = false;
    /**
     * The writer used to send the output to a debug file.
     */
    private BufferedWriter debugSpeedWriter = null;
    /**
     * The node factory when operating in indexed mode.
     */
    private ProteinTreeComponentsFactory componentsFactory = null;
    /**
     * Size of the cache of the most queried peptides.
     */
    private int cacheSize = 100;
    /**
     * Indicates whether the cache should be used.
     */
    private boolean useCache = true;
    /**
     * Cache of the last queried peptides.
     */
    private HashMap<String, ArrayList<PeptideProteinMapping>> lastQueriedPeptidesCache;
    /**
     * Peptide sequences in cache.
     */
    private ArrayDeque<String> lastQueriedPeptidesCacheContent = new ArrayDeque<String>(cacheSize);
    /**
     * Time in ms after which a query is considered as slow.
     */
    private int queryTimeThreshold = 50;
    /**
     * Cache of the last queried peptides where the query took long.
     */
    private HashMap<String, ArrayList<PeptideProteinMapping>> lastSlowQueriedPeptidesCache;
    /**
     * Peptide sequences in slow cache.
     */
    private ArrayDeque<String> lastSlowQueriedPeptidesCacheContent = new ArrayDeque<String>(cacheSize);
    /**
     * The version of the protein tree.
     */
    public static final String version = "1.1.2";
    /**
     * The sequence matching preferences of the matches in cache.
     */
    private SequenceMatchingPreferences cacheSequenceMatchingPreferences = null;
    /**
     * Indicates whether the main thread is listening or preparing to wait.
     */
    private boolean listening = true;
    /**
     * The number of proteins which should be imported at a time.
     */
    public static final int proteinBatchSize = 100;
    /**
     * Cache for the protein lengths.
     */
    private HashMap<String, Integer> proteinLengthsCache = new HashMap<String, Integer>();

    /**
     * Creates a tree based on the proteins present in the sequence factory.
     *
     * @param memoryAllocation the number of MB available for the tree in
     * memory.
     * @param cacheSize the peptide queries caches size (note, there are two of
     * them)
     *
     * @throws IOException if an IOException occurs
     */
    public ProteinTree(int memoryAllocation, int cacheSize) throws IOException {

        this.memoryAllocation = memoryAllocation;
        this.cacheSize = cacheSize;
        lastSlowQueriedPeptidesCache = new HashMap<String, ArrayList<PeptideProteinMapping>>(cacheSize);
        lastQueriedPeptidesCache = new HashMap<String, ArrayList<PeptideProteinMapping>>(cacheSize);

        if (debugSpeed) {
            try {
                debugSpeedWriter = new BufferedWriter(new FileWriter(new File("treeSpeed.txt")));
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * Returns the memory allocation.
     *
     * @return the memory allocation
     */
    public int getMemoryAllocation() {
        return memoryAllocation;
    }

    /**
     * Sets the memory allocation.
     *
     * @param memoryAllocation the memory allocation
     */
    public void setMemoryAllocation(int memoryAllocation) {
        this.memoryAllocation = memoryAllocation;
    }

    /**
     * Initiates the tree.
     *
     * @param nThreads the number of threads to use
     * @param initialTagSize the initial tag size
     * @param maxNodeSize the maximal size of a node. large nodes will be fast
     * to initiate but slow to query. I typically use 500 giving an approximate
     * query time <20ms.
     * @param maxPeptideSize the maximum peptide size
     * @param waitingHandler the waiting handler used to display progress to the
     * user and cancel the process. Can be null but strongly recommended.
     * @param exceptionHandler handler for the exceptions encountered while
     * creating the tree
     * @param printExpectedImportTime if true the expected import time will be
     * printed to the waiting handler
     * @param displayProgress display progress
     *
     * @throws IOException if an IOException occurs
     * @throws ClassNotFoundException if a ClassNotFoundException occurs
     * @throws InterruptedException if an InterruptedException occurs
     * @throws IllegalArgumentException if an IllegalArgumentException occurs
     * @throws SQLException if an SQLException occurs
     */
    public void initiateTree(int initialTagSize, int maxNodeSize, int maxPeptideSize, WaitingHandler waitingHandler, ExceptionHandler exceptionHandler, boolean printExpectedImportTime, boolean displayProgress, int nThreads)
            throws IOException, IllegalArgumentException, InterruptedException, ClassNotFoundException, SQLException {
        initiateTree(initialTagSize, maxNodeSize, maxPeptideSize, null, waitingHandler, exceptionHandler, printExpectedImportTime, displayProgress, nThreads);
    }

    /**
     * Initiates the tree. Note: speed and memory are calibrated for the no
     * enzyme case.
     *
     * @param nThreads the number of threads to use
     * @param initialTagSize the initial size of peptide tag. Large initial size
     * are fast to query, low initial size are fast to initiate. I typically use
     * 3 for databases containing less than 100 000 proteins giving an
     * approximate initiation time of 60ms per accession.
     * @param maxNodeSize the maximal size of a node. large nodes will be fast
     * to initiate but slow to query. I typically use 500 giving an approximate
     * query time <20ms.
     * @param maxPeptideSize the maximum peptide size
     * @param enzyme the enzyme used to select peptides. If null all possible
     * peptides will be indexed
     * @param waitingHandler the waiting handler used to display progress to the
     * user and cancel the process. Can be null but strongly recommended.
     * @param exceptionHandler handler for the exceptions encountered while
     * creating the tree
     * @param printExpectedImportTime if true the expected import time will be
     * printed to the waiting handler
     * @param displayProgress display progress
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while creating the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    public void initiateTree(int initialTagSize, int maxNodeSize, int maxPeptideSize, Enzyme enzyme, WaitingHandler waitingHandler, ExceptionHandler exceptionHandler, boolean printExpectedImportTime, boolean displayProgress, int nThreads)
            throws IOException, InterruptedException, IOException, InterruptedException, ClassNotFoundException, SQLException {

        tree.clear();

        componentsFactory = ProteinTreeComponentsFactory.getInstance();

        try {
            boolean needImport;

            try {
                needImport = !componentsFactory.initiate();
                if (!needImport) {
                    componentsFactory.loadParameters();
                    if (componentsFactory.isCorrupted()) {
                        throw new IllegalArgumentException("Index is corrupted. Database will be reindexed.");
                    }
                    if (!componentsFactory.importComplete()) {
                        throw new IllegalArgumentException("Database import was not successfully completed. Database will be reindexed.");
                    }
                    String tempVersion = componentsFactory.getVersion();
                    if (tempVersion == null || !tempVersion.equals(version)) {
                        throw new IllegalArgumentException("Database index version " + tempVersion + " obsolete. Database will be reindexed.");
                    }
                    if (initialTagSize != componentsFactory.getInitialSize()) {
                        throw new IllegalArgumentException("Different initial size. Database will be reindexed.");
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
                needImport = true;
                DerbyUtil.closeConnection();
                componentsFactory.delete();
                componentsFactory.initiate();
            }

            if (needImport) {
                importDb(initialTagSize, maxNodeSize, maxPeptideSize, enzyme, waitingHandler, exceptionHandler, printExpectedImportTime, displayProgress, nThreads);
            }
        } catch (IOException e) {
            componentsFactory.delete();
            throw e;
        } catch (IllegalArgumentException e) {
            componentsFactory.delete();
            throw e;
        } catch (InterruptedException e) {
            componentsFactory.delete();
            throw e;
        } catch (ClassNotFoundException e) {
            componentsFactory.delete();
            throw e;
        } catch (SQLException e) {
            componentsFactory.delete();
            throw e;
        }

        if (waitingHandler != null && waitingHandler.isRunCanceled()) {
            return;
        }

        try {
            componentsFactory.loadTags();
        } catch (Exception e) {
            // ignore, tree will just be slower
            if (waitingHandler == null || !waitingHandler.isRunCanceled()) {
                e.printStackTrace();
            }
        }
    }

    /**
     * Try to delete the current database. Note: The delete method will attempt
     * to close the connection. It is thus not needed (and not advised) to close
     * the connection before deleting.
     *
     * @return true of the deletion was a success
     */
    public boolean deleteDb() {
        try {
            return componentsFactory.delete();
        } catch (Exception ex) {
            ex.printStackTrace();
            return false;
        }
    }

    /**
     * Imports the db which is in the sequence factory into the tree and saves
     * it in the nodeFactory.
     *
     * @param initialTagSize the initial size of peptide tag. Large initial size
     * are slow to query, low initial size are slow to initiate. I typically use
     * 3 for databases containing less than 100 000 proteins.
     * @param maxNodeSize the maximal size of a node. large nodes will be fast
     * to initiate but slow to query. I typically use 5000.
     * @param maxPeptideSize the maximum peptide size
     * @param enzyme the enzyme used to select peptides. If null all possible
     * peptides will be indexed
     * @param waitingHandler the waiting handler used to display progress to the
     * user and cancel the process. Can be null but strongly recommended.
     * @param exceptionHandler handler for the exceptions encountered while
     * creating the tree
     * @param printExpectedImportTime if true the expected import time will be
     * printed to the waiting handler
     * @param nThreads the number of threads to use
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while creating the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    private void importDb(int initialTagSize, int maxNodeSize, int maxPeptideSize, Enzyme enzyme, WaitingHandler waitingHandler, ExceptionHandler exceptionHandler, boolean printExpectedImportTime, boolean displayProgress, int nThreads)
            throws IOException, InterruptedException, IOException, InterruptedException, ClassNotFoundException, SQLException {

        if (printExpectedImportTime) {
            int nSeconds = getExpectedImportTime();
            String report = "Estimated import time: ";

            if (nSeconds < 120) {
                report += nSeconds + " seconds.";
            } else {
                int nMinutes = nSeconds / 60;
                if (nMinutes < 120) {
                    report += nMinutes + " minutes.";
                } else {
                    int nHours = nMinutes / 60;
                    report += nHours + " hours.";
                }
            }

            if (waitingHandler != null && waitingHandler.isReport()) {
                waitingHandler.appendReport(report, true, true);
                waitingHandler.appendReport("    See http://compomics.github.io/compomics-utilities/wiki/proteininference.html.", true, true);
            } else {
                System.out.println(report);
                System.out.println("    See http://compomics.github.io/compomics-utilities/wiki/proteininference.html.");
            }
        }

        componentsFactory.saveInitialSize(initialTagSize);

        ArrayList<String> tags = TagFactory.getAminoAcidCombinations(initialTagSize);

        int nAccessions;
        if (sequenceFactory.isDefaultReversed()) {
            nAccessions = sequenceFactory.getNTargetSequences();
        } else {
            nAccessions = sequenceFactory.getNSequences();
        }

        long tagsSize = 500; // The space needed for tags in percent (empirical value)
        long criticalSize = tagsSize * nAccessions;

        // try to estimate the number of tags we can process at a time given the memory settings. We might want to fine tune this
        long capacity = memoryAllocation * cacheScale;
        long estimatedTreeSize = 6 * criticalSize; // as far as I tested, 6% of the proteins are covered by a tag in general (ie median)
        int ratio = (int) (estimatedTreeSize / capacity);

        if (ratio == 0) {
            ratio = 1;
        }

        int nPassages = (int) (ratio);
        if (tags.size() % ratio != 0) {
            nPassages += 1;
        }

        int nTags;
        if (ratio > 0) {
            nTags = tags.size() / ratio;
            if (nTags == 0) {
                nTags = 1;
            }
        } else {
            nTags = tags.size();
        }

        if (nPassages > 1) {
            Collections.shuffle(tags);
        }

        if (debugPassages) {
            System.out.println("Estimated tree size: " + estimatedTreeSize);
            System.out.println(new Date() + " " + nPassages + " passages needed (" + nTags + " tags of " + tags.size() + " per passage)");
        }

        if (debugSpeed) {
            debugSpeedWriter.write("Critical size: " + criticalSize);
            System.out.println("Critical size: " + criticalSize);
            estimatedTreeSize = estimatedTreeSize / 100;
            debugSpeedWriter.write("Estimated tree size: " + estimatedTreeSize);
            debugSpeedWriter.write(new Date() + " " + nPassages + " passages needed (" + nTags + " tags of " + tags.size() + " per passage)");
            debugSpeedWriter.newLine();
            debugSpeedWriter.flush();
        }

        if (waitingHandler != null && displayProgress && !waitingHandler.isRunCanceled()) {
            waitingHandler.setSecondaryProgressCounterIndeterminate(false);
            int totalProgress = (int) (nPassages * nAccessions + tags.size() * 2);
            waitingHandler.setMaxSecondaryProgressCounter(totalProgress);
            waitingHandler.setSecondaryProgressCounter(0);
        }

        if (waitingHandler != null && waitingHandler.isRunCanceled()) {
            return;
        }

        long time0 = System.currentTimeMillis();

        ArrayList<String> tempTags = new ArrayList<String>(nTags);
        int tagsLoaded = 0;
        boolean first = true;

        for (String tag : tags) {
            if (tempTags.size() == nTags) {
                loadTags(tempTags, initialTagSize, maxNodeSize, maxPeptideSize, enzyme, nThreads, waitingHandler, exceptionHandler, displayProgress);
                if (first) {
                    first = false;
                }
                tagsLoaded += tempTags.size();
                tempTags.clear();
                if (debugSpeed) {
                    debugSpeedWriter.write(new Date() + " " + tagsLoaded + " tags of " + tags.size() + " loaded.");
                    System.out.println(new Date() + " " + tagsLoaded + " tags of " + tags.size() + " loaded.");
                    debugSpeedWriter.newLine();
                    debugSpeedWriter.flush();
                }
            }
            tempTags.add(tag);

            if (waitingHandler != null && waitingHandler.isRunCanceled()) {
                return;
            }
        }

        if (!tempTags.isEmpty()) {
            loadTags(tempTags, initialTagSize, maxNodeSize, maxPeptideSize, enzyme, nThreads, waitingHandler, exceptionHandler, displayProgress);

            if (debugSpeed) {
                debugSpeedWriter.write(new Date() + " " + tagsLoaded + " tags of " + tags.size() + " loaded.");
                System.out.println(new Date() + " " + tagsLoaded + " tags of " + tags.size() + " loaded.");
                debugSpeedWriter.newLine();
                debugSpeedWriter.flush();
            }
        }

        tagsInTree.addAll(tree.keySet());
        for (Node node : tree.values()) {
            treeSize += node.getSize();
        }

        if (waitingHandler != null && waitingHandler.isRunCanceled()) {
            return;
        }

        componentsFactory.setVersion(version);
        componentsFactory.setFastaFilePath(sequenceFactory.getCurrentFastaFile().getAbsolutePath());
        componentsFactory.setImportComplete(true);

        long time1 = System.currentTimeMillis();
        long initiationTime = time1 - time0;

        if (sequenceFactory.getNSequences() > 1000) {
            UtilitiesUserPreferences utilitiesUserPreferences = UtilitiesUserPreferences.loadUserPreferences();
            utilitiesUserPreferences.addProteinTreeImportTime(sequenceFactory.getCurrentFastaFile().length(), initiationTime);
            UtilitiesUserPreferences.saveUserPreferences(utilitiesUserPreferences);
        }

        if (debugSpeed) {
            debugSpeedWriter.write("tree initiation: " + initiationTime + " ms.");
            System.out.println("tree initiation: " + initiationTime + " ms.");
            debugSpeedWriter.newLine();
            debugSpeedWriter.flush();
        }
    }

    /**
     * Estimates the import time for the database in the sequence factory.
     *
     * @return the import time in seconds
     */
    private int getExpectedImportTime() {

        UtilitiesUserPreferences utilitiesUserPreferences = UtilitiesUserPreferences.loadUserPreferences();
        HashMap<Long, ArrayList<Long>> importTimeMap = utilitiesUserPreferences.getProteinTreeImportTime();

        if (importTimeMap.isEmpty()) {
            return sequenceFactory.getNTargetSequences() * 16 / 1000;
        } else {

            ArrayList<Double> ratios = new ArrayList<Double>();

            for (Long size : importTimeMap.keySet()) {
                for (Long time : importTimeMap.get(size)) {
                    double ratio = (double) (size / time);
                    ratios.add(ratio);
                }
            }

            double ratio = BasicMathFunctions.percentile(ratios, 0.05);
            int timeInSeconds = (int) (1.2 * sequenceFactory.getCurrentFastaFile().length() / (1000 * ratio));
            timeInSeconds = Math.max(timeInSeconds, 1);
            return timeInSeconds;
        }
    }

    /**
     * Loads the tags found in the given proteins in the tree and saves the end
     * nodes in the NodeFactory if not null.
     *
     * @param tags the tags of interest
     * @param waitingHandler the waiting handler used to display progress to the
     * user and cancel the process. Can be null but strongly recommended.
     * @param exceptionHandler handler for the exceptions encountered while
     * creating the tree
     * @param enzyme the enzyme restriction
     * @param loadLengths boolean indicating whether protein lengths should be
     * loaded in the db
     * @param loadedLengths the accessions of the proteins from which the length
     * is already saved
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while creating the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    private synchronized void loadTags(ArrayList<String> tags,
            int initialTagSize, int maxNodeSize, int maxPeptideSize, Enzyme enzyme, int nThreads, WaitingHandler waitingHandler, ExceptionHandler exceptionHandler, boolean displayProgress)
            throws IOException, InterruptedException, ClassNotFoundException, SQLException {

        // find the tags in the proteins and create a node per tag found
        if (nThreads == 1) {
            indexProteinsSingleThread(tags, initialTagSize, enzyme, waitingHandler, displayProgress);
        } else {
            indexProteins(tags, initialTagSize, enzyme, waitingHandler, exceptionHandler, displayProgress, nThreads);
        }

        // split the nodes and save them in the db
        if (nThreads == 1) {
            processRawNodesSingleThread(tags, maxNodeSize, maxPeptideSize, waitingHandler, displayProgress);
        } else {
            processRawNodes(maxNodeSize, maxPeptideSize, waitingHandler, exceptionHandler, displayProgress, nThreads);
        }

        // clear memory before further processing
        tree.clear();
        System.gc();
    }

    /**
     * Iterates all the proteins and indexes the given tags in their sequences
     * by batches of proteinBatchSize using a SequenceIndexer in a separate
     * thread. When sequence indexers are finished, a node per tag is created
     * and stored in the tree map.
     *
     * @param tags the tags to index
     * @param waitingHandler waiting handler providing feedback on the process
     * and allowing canceling the process
     * @param initialTagSize the initial tag size
     * @param enzyme enzyme to use. Can be null
     * @param loadLengths boolean indicating whether protein lengths should be
     * loaded in the db
     * @param displayProgress boolean indicating whether progress shall be
     * displayed using the waiting handler
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while creating the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    private void indexProteinsSingleThread(ArrayList<String> tags,
            int initialTagSize, Enzyme enzyme, WaitingHandler waitingHandler, boolean displayProgress)
            throws IOException, InterruptedException, ClassNotFoundException, SQLException {

        ProteinIterator proteinIterator = sequenceFactory.getProteinIterator(sequenceFactory.isDefaultReversed());

        while (proteinIterator.hasNext()) {
            Protein protein = proteinIterator.getNextProtein();
            String accession = protein.getAccession();

            if (protein.getLength() > 0) { // ignore empty protein sequences

                HashMap<String, ArrayList<Integer>> indexesMap = getTagToIndexesMap(protein.getSequence(), tags, enzyme, waitingHandler);

                for (String tag : indexesMap.keySet()) {
                    ArrayList<Integer> indexes = indexesMap.get(tag);
                    if (!indexes.isEmpty()) {
                        Node node = tree.get(tag);
                        if (node == null) {
                            node = new Node(initialTagSize);
                            tree.put(tag, node);
                        }
                        node.addAccession(accession, indexes);
                    }

                    if (waitingHandler != null && waitingHandler.isRunCanceled()) {
                        break;
                    }
                }

                if (displayProgress && waitingHandler != null) {
                    waitingHandler.increaseSecondaryProgressCounter();
                }

                if (waitingHandler != null && waitingHandler.isRunCanceled()) {
                    tree.clear();
                    return;
                }
            }
        }
    }

    /**
     * Iterates all the proteins and indexes the given tags in their sequences
     * by batches of proteinBatchSize using a SequenceIndexer in a separate
     * thread. When sequence indexers are finished, a node per tag is created
     * and stored in the tree map.
     *
     * @param tags the tags to index
     * @param waitingHandler waiting handler providing feedback on the process
     * and allowing canceling the process
     * @param exceptionHandler handler for the exceptions encountered while
     * creating the tree
     * @param initialTagSize the initial tag size
     * @param enzyme enzyme to use. Can be null
     * @param loadLengths boolean indicating whether protein lengths should be
     * loaded in the db
     * @param displayProgress boolean indicating whether progress shall be
     * displayed using the waiting handler
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while creating the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    private void indexProteins(ArrayList<String> tags,
            int initialTagSize, Enzyme enzyme, WaitingHandler waitingHandler, ExceptionHandler exceptionHandler, boolean displayProgress, int nThreads)
            throws IOException, InterruptedException, ClassNotFoundException, SQLException {

        ArrayList<Protein> sequenceBuffer = new ArrayList<Protein>(proteinBatchSize);
        ArrayList<SequenceIndexer> sequenceIndexers = new ArrayList<SequenceIndexer>(nThreads);
        ExecutorService pool = Executors.newFixedThreadPool(nThreads);

        ProteinIterator proteinIterator = sequenceFactory.getProteinIterator(sequenceFactory.isDefaultReversed());

        while (proteinIterator.hasNext()) {
            Protein protein = proteinIterator.getNextProtein();
            sequenceBuffer.add(protein);
            if (sequenceBuffer.size() == proteinBatchSize) {
                while (sequenceIndexers.size() == nThreads) {
                    processFinishedIndexers(sequenceIndexers, initialTagSize);
                }
                SequenceIndexer sequenceIndexer = new SequenceIndexer(sequenceBuffer, tags, enzyme, waitingHandler, exceptionHandler, displayProgress);
                pool.submit(new Thread(sequenceIndexer, "sequence indexing"));
                sequenceBuffer = new ArrayList<Protein>(proteinBatchSize);
                sequenceIndexers.add(sequenceIndexer);
            }
            if (waitingHandler != null) {
                if (waitingHandler.isRunCanceled() || waitingHandler.isRunFinished()) {
                    pool.shutdownNow();
                    emptyCache();
                    return;
                }
            }
        }

        if (!sequenceBuffer.isEmpty()) {
            SequenceIndexer sequenceIndexer = new SequenceIndexer(sequenceBuffer, tags, enzyme, waitingHandler, exceptionHandler, displayProgress);
            pool.submit(new Thread(sequenceIndexer, "sequence indexing"));
            sequenceIndexers.add(sequenceIndexer);
        }

        while (!sequenceIndexers.isEmpty()) {
            processFinishedIndexers(sequenceIndexers, initialTagSize);
        }
        pool.shutdown();
    }

    /**
     * Splits the raw nodes and saves them in the database.
     *
     * @param tags the tags indexed
     * @param maxNodeSize the maximal size allowed for a node
     * @param maxPeptideSize the maximal peptide length allowed
     * @param waitingHandler waiting handler providing feedback on the process
     * and allowing canceling the process
     * @param displayProgress boolean indicating whether progress shall be
     * displayed using the waiting handler
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while creating the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    private void processRawNodesSingleThread(ArrayList<String> tags, int maxNodeSize, int maxPeptideSize, WaitingHandler waitingHandler, boolean displayProgress)
            throws IOException, InterruptedException, ClassNotFoundException, SQLException {

        int batchSize = (int) Math.ceil(tree.size() / 3);
        batchSize = Math.min(10000, batchSize);
        batchSize = Math.max(1000, batchSize);

        HashMap<String, Object> splittedNodes = new HashMap<String, Object>(batchSize);

        for (String tag : tags) {

            Node node = tree.get(tag);

            if (node != null) {

                node.splitNode(maxNodeSize, maxPeptideSize);
                splittedNodes.put(tag, node);

                if (splittedNodes.size() == batchSize) {
                    componentsFactory.saveNodes(splittedNodes, waitingHandler);
                    splittedNodes.clear();
                }

            }
            if (waitingHandler != null) {
                if (displayProgress) {
                    waitingHandler.increaseSecondaryProgressCounter();
                    if (node == null) {
                        waitingHandler.increaseSecondaryProgressCounter();
                    }
                }
                if (waitingHandler.isRunCanceled() || waitingHandler.isRunFinished()) {
                    emptyCache();
                    return;
                }
            }
        }

        if (!splittedNodes.isEmpty()) {
            componentsFactory.saveNodes(splittedNodes, waitingHandler);
            splittedNodes.clear();
        }
    }

    /**
     * Splits the raw nodes and saves them in the database
     *
     * @param maxNodeSize the maximal size allowed for a node
     * @param maxPeptideSize the maximal peptide length allowed
     * @param waitingHandler waiting handler providing feedback on the process
     * and allowing canceling the process
     * @param exceptionHandler handler for the exceptions encountered while
     * creating the tree
     * @param displayProgress boolean indicating whether progress shall be
     * displayed using the waiting handler
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while creating the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    private void processRawNodes(int maxNodeSize, int maxPeptideSize, WaitingHandler waitingHandler, ExceptionHandler exceptionHandler, boolean displayProgress, int nThreads)
            throws IOException, InterruptedException, ClassNotFoundException, SQLException {

        ArrayList<NodeSplitter> nodeSplitters = new ArrayList<NodeSplitter>(nThreads);
        ExecutorService pool = Executors.newFixedThreadPool(nThreads);

        for (String tag : tree.keySet()) {

            Node node = tree.get(tag);

            while (nodeSplitters.size() == nThreads) {
                processFinishedNodeSplitters(nodeSplitters, null); // @TODO: add waiting handler
            }

            NodeSplitter nodeSplitter = new NodeSplitter(tag, node, maxNodeSize, maxPeptideSize, waitingHandler, exceptionHandler, displayProgress);
            pool.submit(new Thread(nodeSplitter, "Node splitting of tag " + tag));
            nodeSplitters.add(nodeSplitter);

            if (waitingHandler != null) {
                if (waitingHandler.isRunCanceled() || waitingHandler.isRunFinished()) {
                    emptyCache();
                    pool.shutdownNow();
                    return;
                }
            }
        }

        while (!nodeSplitters.isEmpty()) {
            processFinishedNodeSplitters(nodeSplitters, null); // @TODO: add waiting handler
        }
        pool.shutdown();
    }

    /**
     * Clears the finished raw node splitters from a given list or wait for one
     * to finish and batch saves the splitted nodes.
     *
     * @param nodeProcessors the node processors of interest
     * @param waitingHandler the waiting handler
     *
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while splitting nodes.
     */
    private synchronized void processFinishedNodeSplitters(ArrayList<NodeSplitter> nodeSplitters, WaitingHandler waitingHandler) throws InterruptedException, SQLException, IOException {

        listening = false;
        ArrayList<NodeSplitter> done = new ArrayList<NodeSplitter>();

        for (NodeSplitter nodeSplitter : nodeSplitters) {
            if (nodeSplitter.isFinished()) {
                done.add(nodeSplitter);
            }
        }

        if (done.isEmpty()) {
            listening = true;
            wait();
            for (NodeSplitter nodeSplitter : nodeSplitters) {
                if (nodeSplitter.isFinished()) {
                    done.add(nodeSplitter);
                }
            }
        }

        listening = true;
        HashMap<String, Object> splittedNodes = new HashMap<String, Object>(done.size());

        for (NodeSplitter nodeSplitter : done) {
            splittedNodes.put(nodeSplitter.getTag(), nodeSplitter.getNode());
            nodeSplitter.clear();
        }

        componentsFactory.saveNodes(splittedNodes, waitingHandler);
        nodeSplitters.removeAll(done);
    }

    /**
     * Stores the result of the finished indexers and updates the list. Waits if
     * none is finished.
     *
     * @param sequenceIndexers the sequence indexers
     * @param initialTagSize the initial tag size
     *
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while processing finished indexers.
     */
    private synchronized void processFinishedIndexers(ArrayList<SequenceIndexer> sequenceIndexers, int initialTagSize) throws InterruptedException {

        listening = false;
        ArrayList<SequenceIndexer> done = new ArrayList<SequenceIndexer>();

        for (SequenceIndexer sequenceIndexer : sequenceIndexers) {
            if (sequenceIndexer.isFinished()) {
                done.add(sequenceIndexer);
            }
        }

        if (done.isEmpty()) {
            listening = true;
            wait();
            for (SequenceIndexer sequenceIndexer : sequenceIndexers) {
                if (sequenceIndexer.isFinished()) {
                    done.add(sequenceIndexer);
                }
            }
        }

        listening = true;

        for (SequenceIndexer sequenceIndexer : done) {
            HashMap<String, HashMap<String, ArrayList<Integer>>> tagToIndexesMap = sequenceIndexer.getIndexes();
            for (String accession : tagToIndexesMap.keySet()) {
                for (String tag : tagToIndexesMap.get(accession).keySet()) {
                    ArrayList<Integer> indexes = tagToIndexesMap.get(accession).get(tag);
                    if (!indexes.isEmpty()) {
                        Node node = tree.get(tag);
                        if (node == null) {
                            node = new Node(initialTagSize);
                            tree.put(tag, node);
                        }
                        node.addAccession(accession, indexes);
                    }
                }
            }
            sequenceIndexer.clear();
        }

        sequenceIndexers.removeAll(done);
    }

    @Override
    public ArrayList<PeptideProteinMapping> getProteinMapping(String peptideSequence, SequenceMatchingPreferences proteinInferencePreferences)
            throws IOException, InterruptedException, ClassNotFoundException, SQLException {

        long time0 = 0;
        if (debugSpeed) {
            time0 = System.currentTimeMillis();
        }

        ArrayList<PeptideProteinMapping> result = getProteinMapping(peptideSequence, proteinInferencePreferences, false);

        if (debugSpeed) {
            long time1 = System.currentTimeMillis();
            long queryTime = time1 - time0;
            debugSpeedWriter.write(peptideSequence + "\t" + result.size() + "\t" + queryTime);
            debugSpeedWriter.newLine();
            debugSpeedWriter.flush();
        }

        return result;
    }

    /**
     * Returns the protein mapping in the sequence factory for the given peptide
     * sequence. peptide sequence > protein accession > index in the
     * protein. An empty map if not.
     *
     * @param peptideSequence the peptide sequence
     * @param sequenceMatchingPreferences the sequence matching preferences
     * @param reversed boolean indicating whether we are looking at a reversed
     * peptide sequence
     *
     * @return the peptide to protein mapping: Accession > list of indexes
     * where the peptide can be found on the sequence
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while interacting with the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    private ArrayList<PeptideProteinMapping> getProteinMapping(String peptideSequence, SequenceMatchingPreferences sequenceMatchingPreferences, boolean reversed) throws IOException, InterruptedException, ClassNotFoundException, SQLException {

        if (useCache && this.cacheSequenceMatchingPreferences != null && !this.cacheSequenceMatchingPreferences.isSameAs(sequenceMatchingPreferences)) {
            emptyCache();
            this.cacheSequenceMatchingPreferences = sequenceMatchingPreferences;
        }

        ArrayList<PeptideProteinMapping> result = null;
        if (useCache) {
            result = lastQueriedPeptidesCache.get(peptideSequence);
        }

        if (result == null) {
            if (useCache) {
                result = lastSlowQueriedPeptidesCache.get(peptideSequence);
            }
            if (result == null) {
                if (sequenceFactory.isDefaultReversed() && useCache) {
                    String reversedSequence = SequenceFactory.reverseSequence(peptideSequence);
                    result = lastQueriedPeptidesCache.get(reversedSequence);
                    if (result == null) {
                        result = lastSlowQueriedPeptidesCache.get(reversedSequence);
                    }
                    if (result != null) {
                        return getReversedResults(result, reversedSequence);
                    }
                }

                long timeStart = System.currentTimeMillis();

                int initialTagSize = componentsFactory.getInitialSize();
                if (peptideSequence.length() < initialTagSize) {
                    throw new IllegalArgumentException("Peptide (" + peptideSequence + ") should be at least of length " + initialTagSize + ".");
                }

                result = new ArrayList<PeptideProteinMapping>(2);

                AminoAcidSequence peptideAminoAcidSequence = new AminoAcidSequence(peptideSequence);
                Double limitX = null;
                if (sequenceMatchingPreferences.hasLimitX()) {
                    limitX = sequenceMatchingPreferences.getLimitX() * peptideSequence.length() / initialTagSize;
                }
                HashSet<String> initialTags = getInitialTags(peptideAminoAcidSequence, sequenceMatchingPreferences, limitX);

                for (String tag : initialTags) {
                    Node node = getNode(tag);
                    if (node != null) {
                        ArrayList<PeptideProteinMapping> tagResults = node.getProteinMapping(peptideAminoAcidSequence, tag, sequenceMatchingPreferences);
                        result.addAll(tagResults);
                    }
                }

                if (sequenceFactory.isDefaultReversed() && !reversed) {
                    String reversedSequence = SequenceFactory.reverseSequence(peptideSequence);
                    ArrayList<PeptideProteinMapping> reversedResult;
                    if (!reversedSequence.equals(peptideSequence)) {
                        reversedResult = getProteinMapping(reversedSequence, sequenceMatchingPreferences, true);
                        reversedResult = getReversedResults(reversedResult, reversedSequence);
                    } else {
                        reversedResult = getReversedResults(result, reversedSequence);
                    }
                    result.addAll(reversedResult);
                }

                if (!reversed && useCache) {
                    long timeEnd = System.currentTimeMillis();
                    long queryTime = timeEnd - timeStart;
                    addToCache(peptideSequence, result, queryTime);
                }
            }
        }

        return result;
    }

    /**
     * Adds a mapping to the cache.
     *
     * @param peptideSequence the newly mapped peptide sequence
     * @param mapping the protein mapping
     * @param queryTime the mapping time
     */
    private synchronized void addToCache(String peptideSequence, ArrayList<PeptideProteinMapping> mapping, long queryTime) {

        if (queryTime <= queryTimeThreshold) {
            lastQueriedPeptidesCache.put(peptideSequence, mapping);
            lastQueriedPeptidesCacheContent.add(peptideSequence);
            if (lastQueriedPeptidesCacheContent.size() > cacheSize) {
                String key = lastQueriedPeptidesCacheContent.pollLast();
                lastQueriedPeptidesCache.remove(key);
            }
        } else {
            lastSlowQueriedPeptidesCache.put(peptideSequence, mapping);
            lastSlowQueriedPeptidesCacheContent.add(peptideSequence);
            if (lastSlowQueriedPeptidesCacheContent.size() > cacheSize) {
                String key = lastSlowQueriedPeptidesCacheContent.pollLast();
                lastSlowQueriedPeptidesCache.remove(key);
            }
        }
    }

    @Override
    public ArrayList<PeptideProteinMapping> getProteinMapping(Tag tag, TagMatcher tagMatcher, SequenceMatchingPreferences sequenceMatchingPreferences, Double massTolerance) throws IOException, InterruptedException, ClassNotFoundException, SQLException {

        int initialTagSize = componentsFactory.getInitialSize();
        AminoAcidPattern longestAminoAcidPattern = null;
        AminoAcidSequence longestAminoAcidSequence = null;
        int componentIndex = -1;
        for (int i = 0; i < tag.getContent().size(); i++) {
            TagComponent tagComponent = tag.getContent().get(i);
            if (tagComponent instanceof AminoAcidPattern) {
                AminoAcidPattern aminoAcidPattern = (AminoAcidPattern) tagComponent;
                if (aminoAcidPattern.length() >= initialTagSize && (longestAminoAcidPattern == null || aminoAcidPattern.length() > longestAminoAcidPattern.length()) && (longestAminoAcidSequence == null || aminoAcidPattern.length() > longestAminoAcidSequence.length())) {
                    componentIndex = i;
                    longestAminoAcidPattern = aminoAcidPattern;
                    longestAminoAcidSequence = null;
                }
            } else if (tagComponent instanceof AminoAcidSequence) {
                AminoAcidSequence aminoAcidSequence = (AminoAcidSequence) tagComponent;
                if (aminoAcidSequence.length() >= initialTagSize && (longestAminoAcidPattern == null || aminoAcidSequence.length() > longestAminoAcidPattern.length()) && (longestAminoAcidSequence == null || aminoAcidSequence.length() > longestAminoAcidSequence.length())) {
                    componentIndex = i;
                    longestAminoAcidSequence = aminoAcidSequence;
                    longestAminoAcidPattern = null;
                }
            }
        }
        if (componentIndex == -1) {
            throw new IllegalArgumentException("No amino acid sequence longer than " + initialTagSize + " was found for tag " + tag + ".");
        }
        ArrayList<PeptideProteinMapping> seeds = new ArrayList<PeptideProteinMapping>();
        if (longestAminoAcidPattern != null) {
            for (String peptideSequence : longestAminoAcidPattern.getAllPossibleSequences()) {
                double xShare = ((double) Util.getOccurrence(peptideSequence, 'X')) / peptideSequence.length();
                if (!sequenceMatchingPreferences.hasLimitX() || xShare <= sequenceMatchingPreferences.getLimitX()) {
                    seeds.addAll(getProteinMapping(peptideSequence, sequenceMatchingPreferences));
                }
            }
        } else {
            seeds.addAll(getProteinMapping(longestAminoAcidSequence.getSequence(), sequenceMatchingPreferences));
        }
        ArrayList<PeptideProteinMapping> results = new ArrayList<PeptideProteinMapping>(seeds.size());
        for (PeptideProteinMapping peptideProteinMapping : seeds) {
            String accession = peptideProteinMapping.getProteinAccession();
            String proteinSequence = sequenceFactory.getProtein(accession).getSequence();
            int seedIndex = peptideProteinMapping.getIndex();
            ArrayList<PeptideProteinMapping> tagMapping = tagMatcher.getPeptideMatches(tag, accession, proteinSequence, seedIndex,
                    componentIndex, massTolerance);
            results.addAll(tagMapping);
        }
        
        return results;
    }

    /**
     * Returns a list of possible initial tags.
     *
     * @param aminoAcidSequence the peptide sequence
     * @param sequenceMatchingPreferences the sequence matching preferences to
     * use
     *
     * @return a list of possible initial tags.
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while interacting with the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    private HashSet<String> getInitialTags(AminoAcidSequence aminoAcidSequence, SequenceMatchingPreferences sequenceMatchingPreferences, Double limitX)
            throws SQLException, IOException, ClassNotFoundException, InterruptedException {

        int initialTagSize = componentsFactory.getInitialSize();
        HashSet<String> result = new HashSet<String>();

        for (int i = 0; i < initialTagSize; i++) {

            AminoAcid aminoAcid = aminoAcidSequence.getAminoAcidAt(i);

            if (result.isEmpty()) {
                if (sequenceMatchingPreferences.getSequenceMatchingType() == MatchingType.string) {
                    String originalAa = aminoAcid.singleLetterCode;
                    result.add(originalAa);
                } else {
                    for (char originalAa : aminoAcid.getSubAminoAcids()) {
                        String newTag = String.valueOf(originalAa);
                        result.add(newTag);
                    }
                    for (char combinationAa : aminoAcid.getCombinations()) {
                        String newTag = String.valueOf(combinationAa);
                        result.add(newTag);
                    }
                    if (sequenceMatchingPreferences.getSequenceMatchingType() == MatchingType.indistiguishableAminoAcids
                            && (aminoAcid == AminoAcid.I || aminoAcid == AminoAcid.J || aminoAcid == AminoAcid.L)) {
                        result.add("I");
                        result.add("J");
                        result.add("L");
                    }
                }
            } else {
                HashSet<String> newResults = new HashSet<String>();
                for (String sequence : result) {
                    if (sequenceMatchingPreferences.getSequenceMatchingType() == MatchingType.string) {
                        String originalAa = aminoAcid.singleLetterCode;
                        newResults.add(sequence + aminoAcid.singleLetterCode);
                    } else {
                        for (char originalAa : aminoAcid.getSubAminoAcids()) {
                            String newTag = sequence + originalAa;
                            newResults.add(newTag);
                        }
                        for (char newAa : aminoAcid.getCombinations()) {
                            String newTag = sequence + newAa;
                            newResults.add(newTag);
                        }
                        if (sequenceMatchingPreferences.getSequenceMatchingType() == MatchingType.indistiguishableAminoAcids
                                && (aminoAcid == AminoAcid.I || aminoAcid == AminoAcid.J || aminoAcid == AminoAcid.L)) {
                            String newTag = sequence + "I";
                            newResults.add(newTag);
                            newTag = sequence + "J";
                            newResults.add(newTag);
                            newTag = sequence + "L";
                            newResults.add(newTag);
                        }
                    }
                }
                result = newResults;
            }
        }
        if (limitX != null && limitX < 1) {
            HashSet<String> filtered = new HashSet<String>();
            for (String sequence : result) {
                double xShare = ((double) Util.getOccurrence(sequence, 'X')) / sequence.length();
                if (xShare <= limitX) {
                    filtered.add(sequence);
                }
            }
            result = filtered;
        }
        return result;
    }

    /**
     * Reverts the indexes and the protein accessions of the given mapping.
     *
     * @param forwardResults the given mapping
     * @param sequence the sequence of interest
     *
     * @return the reversed indexes
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while interacting with the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    private ArrayList<PeptideProteinMapping> getReversedResults(ArrayList<PeptideProteinMapping> forwardResults, String sequence) throws SQLException, ClassNotFoundException, IOException, InterruptedException {

        ArrayList<PeptideProteinMapping> results = new ArrayList<PeptideProteinMapping>(forwardResults.size());

        for (PeptideProteinMapping peptideProteinMapping : forwardResults) {

            int peptideLength = sequence.length();
            String reversedSequence = SequenceFactory.reverseSequence(sequence);

            String accession = peptideProteinMapping.getProteinAccession();

            String reversedAccession;
            Integer proteinLength;

            if (accession.endsWith(SequenceFactory.getDefaultDecoyAccessionSuffix())) {
                reversedAccession = SequenceFactory.getDefaultTargetAccession(accession);
                proteinLength = getProteinLength(reversedAccession);
                if (proteinLength == null) {
                    throw new IllegalArgumentException("Length of protein " + reversedAccession + " not found.");
                }
            } else {
                reversedAccession = SequenceFactory.getDefaultDecoyAccession(accession);
                proteinLength = getProteinLength(accession);
                if (proteinLength == null) {
                    throw new IllegalArgumentException("Length of protein " + accession + " not found.");
                }
            }

            int forwardIndex = peptideProteinMapping.getIndex();
            int reversedIndex = proteinLength - forwardIndex - peptideLength;
            if (reversedIndex < 0 || reversedIndex >= proteinLength) {
                throw new IllegalArgumentException("Wrong index found for peptide " + reversedSequence + " in protein " + reversedAccession + ": " + reversedIndex + ".");
            }

            PeptideProteinMapping reversedMapping = new PeptideProteinMapping(reversedAccession, reversedSequence, reversedIndex);
            results.add(reversedMapping);
        }

        return results;
    }

    /**
     * Returns a node related to a tag and updates the cache. Null if not found.
     *
     * @param tag the tag of interest
     *
     * @return the corresponding node
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while interacting with the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    private Node getNode(String tag) throws SQLException, ClassNotFoundException, IOException, InterruptedException {

        Node result = tree.get(tag);

        if (result == null) {
            result = getNodeSynchronized(tag);
        }

        return result;
    }

    /**
     * Returns a node related to a tag and updates the cache. Null if not found.
     *
     * @param tag the tag of interest
     *
     * @return the corresponding node
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while interacting with the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    private synchronized Node getNodeSynchronized(String tag) throws SQLException, ClassNotFoundException, IOException, InterruptedException {

        Node result = tree.get(tag);

        if (result == null) {

            result = componentsFactory.getNode(tag);

            if (result != null) {

                long capacity = memoryAllocation * cacheScale;

                while (treeSize > capacity && !tagsInTree.isEmpty()) {
                    String tempTag = tagsInTree.pollLast();
                    Node tempNode = tree.get(tempTag);
                    treeSize -= tempNode.getSize();
                    tree.remove(tempTag);
                }

                tree.put(tag, result);
                treeSize += result.getSize();
                tagsInTree.addFirst(tag);
            }
        }

        return result;
    }

    @Override
    public void close() throws IOException, SQLException {
        if (debugSpeed) {
            try {
                debugSpeedWriter.flush();
                debugSpeedWriter.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        emptyCache();
        componentsFactory.close();

        // delete outdated trees
        try {
            ProteinTreeComponentsFactory.deletOutdatedTrees();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * Returns the size of the cache used for peptide mappings (note that there
     * are two of them).
     *
     * @return the size of the cache used for peptide mappings
     */
    public int getCacheSize() {
        return cacheSize;
    }

    /**
     * Sets the size of the cache used for peptide mappings (note that there are
     * two of them).
     *
     * @param cacheSize the size of the cache used for peptide mappings
     */
    public void setCacheSize(int cacheSize) {
        this.cacheSize = cacheSize;
    }

    /**
     * Empties the cache.
     */
    public void emptyCache() {
        tree.clear();
        tagsInTree.clear();
        lastQueriedPeptidesCache.clear();
        lastQueriedPeptidesCacheContent.clear();
        lastSlowQueriedPeptidesCache.clear();
        lastSlowQueriedPeptidesCacheContent.clear();
        proteinLengthsCache.clear();
    }

    /**
     * Reduces the node cache size by the given share. If less than 100 nodes
     * are left they will all be removed.
     *
     * @param share the share of the cache to remove. 0.5 means 50%
     */
    public synchronized void reduceNodeCacheSize(double share) {
        double limit = tree.size();
        if (limit > 100) {
            limit = share * limit;
        }
        for (int i = 0; i < limit; i++) {
            String tempTag = tagsInTree.pollLast();
            Node tempNode = tree.get(tempTag);
            if (tempNode == null) {
                // another thread already reduced the cache size
                break;
            }
            treeSize -= tempNode.getSize();
            tree.remove(tempTag);
        }
    }

    /**
     * Returns the number of nodes currently loaded in cache.
     *
     * @return the number of nodes currently loaded in cache
     */
    public int getNodesInCache() {
        return tree.size();
    }

    /**
     * Returns a PeptideIterator which iterates alphabetically all peptides
     * corresponding to the end of a branch in the tree.
     *
     * @return a PeptideIterator which iterates alphabetically all peptides
     * corresponding to the end of a branch in the tree
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while interacting with the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    public PeptideIterator getPeptideIterator() throws SQLException, IOException, ClassNotFoundException, InterruptedException {
        return new PeptideIterator();
    }

    /**
     * Notifies the tree that a runnable has finished working.
     *
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while waiting.
     */
    private synchronized void runnableFinished() throws InterruptedException {
        while (!listening) {
            wait(10);
        }
        notify();
    }

    /**
     * Alphabetical iterator for the tree.
     */
    public class PeptideIterator implements Iterator {

        /**
         * The initial tag size of the tree.
         */
        private Integer initialTagSize;
        /**
         * The list of possible initial tags.
         */
        private ArrayList<String> tags;
        /**
         * The current node.
         */
        private Node currentNode = null;
        /**
         * The parent node.
         */
        private Node parentNode = null;
        /**
         * The current peptide sequence.
         */
        private String currentSequence = null;
        /**
         * List of amino acids found in the current node subtree if any.
         */
        private ArrayList<Character> aas = null;
        /**
         * The current iterator position in the tags.
         */
        private int i = -1;
        /**
         * The current iterator position in the amino acid list.
         */
        private int j = 0;

        /**
         * Constructor.
         *
         * @throws IOException exception thrown whenever an error occurs while
         * reading or writing a file.
         * @throws ClassNotFoundException exception thrown whenever an error
         * occurs while deserializing an object.
         * @throws InterruptedException exception thrown whenever a threading
         * issue occurred while interacting with the tree.
         * @throws SQLException if an SQLException exception thrown whenever a
         * problem occurred while interacting with the tree database.
         */
        private PeptideIterator() throws SQLException, IOException, ClassNotFoundException, InterruptedException {
            initialTagSize = componentsFactory.getInitialSize();
            tags = TagFactory.getAminoAcidCombinations(initialTagSize);
        }

        @Override
        public boolean hasNext() {

            try {
                if (currentNode != null && currentNode.getDepth() == initialTagSize && currentNode.getAccessions() != null && i < tags.size() - 1) {
                    // ok we're done with this node
                    parentNode = null;
                    aas = null;
                    j = 0;
                    currentSequence = tags.get(++i);
                    currentNode = getNode(currentSequence);
                }

                while (++i < tags.size() && currentNode == null && parentNode == null) {
                    currentSequence = tags.get(i);
                    currentNode = getNode(currentSequence);
                }

                if (i < tags.size()) {
                    if (aas != null) {

                        int parentDepth = currentSequence.length() - 1;
                        currentSequence = currentSequence.substring(0, parentDepth);

                        if (++j == aas.size()) {
                            if (!parentNode.getTermini().isEmpty()) {
                                currentNode = null;
                                return true;
                            } else {
                                j++;
                            }
                        }

                        if (j == aas.size() + 1) {
                            if (parentDepth <= initialTagSize) {
                                // ok we're done with this node
                                currentSequence = null;
                                currentNode = null;
                                parentNode = null;
                                aas = null;
                                j = 0;
                            } else {
                                parentDepth = currentSequence.length() - 1;
                                String parentSequence = currentSequence.substring(0, parentDepth);
                                char aa = currentSequence.charAt(parentDepth);
                                if (parentDepth == initialTagSize) {
                                    parentNode = getNode(parentSequence);
                                } else {
                                    String tag = parentSequence.substring(0, initialTagSize);
                                    parentNode = getNode(tag).getSubNode(parentSequence);
                                }
                                currentNode = parentNode.getSubtree().get(aa);
                                aas = new ArrayList<Character>(parentNode.getSubtree().keySet());
                                Collections.sort(aas);
                                j = aas.indexOf(aa);
                            }

                            return hasNext();
                        }

                        char aa = aas.get(j);
                        currentSequence += aa;
                        currentNode = parentNode.getSubtree().get(aa);
                    }

                    while (currentNode.getAccessions() == null) {

                        j = 0;
                        aas = new ArrayList<Character>(currentNode.getSubtree().keySet());
                        parentNode = currentNode;

                        if (!aas.isEmpty()) {
                            Collections.sort(aas);
                            char aa = aas.get(j);
                            currentSequence += aa;
                            currentNode = currentNode.getSubtree().get(aa);
                        } else {
                            currentNode = null;
                            return true;
                        }
                    }

                    return true;
                }

                return false;
            } catch (Exception e) {
                e.printStackTrace();
                throw new IllegalArgumentException("An error occurred while iterating the tree. See previous exception.");
            }
        }

        @Override
        public Object next() {
            return currentSequence;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("ProteinTrees are not editable.");
        }

        /**
         * Returns the protein mapping of the current peptide.
         *
         * @return the protein mapping of the current peptide.
         */
        public HashMap<String, ArrayList<Integer>> getMapping() {
            if (currentNode != null) {
                return currentNode.getAccessions();
            } else {
                return parentNode.getTermini();
            }
        }
    }

    /**
     * Returns all the positions of the given tags on the given sequence in a
     * map: tag > list of indexes in the sequence.
     *
     * @param sequence the sequence of interest
     * @param tags the tags of interest
     * @param enzyme the enzyme restriction
     * @param waitingHandler waiting handler
     *
     * @return all the positions of the given tags
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while interacting with the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.F
     */
    private HashMap<String, ArrayList<Integer>> getTagToIndexesMap(String sequence, ArrayList<String> tags, Enzyme enzyme,
            WaitingHandler waitingHandler) throws SQLException, IOException, ClassNotFoundException, InterruptedException {

        HashMap<String, ArrayList<Integer>> tagToIndexesMap = new HashMap<String, ArrayList<Integer>>(tags.size());
        Integer initialTagSize = componentsFactory.getInitialSize();

        for (String tag : tags) {
            tagToIndexesMap.put(tag, new ArrayList<Integer>());
        }

        for (int i = 0; i < sequence.length() - initialTagSize; i++) {

            if (enzyme == null || i == 0 || enzyme.isCleavageSite(sequence.charAt(i - 1), sequence.charAt(i))) {

                char[] tagValue = new char[initialTagSize];

                for (int j = 0; j < initialTagSize; j++) {
                    char aa = sequence.charAt(i + j);
                    tagValue[j] = aa;
                }

                String tag = new String(tagValue);
                ArrayList<Integer> tempIndexes = tagToIndexesMap.get(tag);

                if (tempIndexes != null) {
                    tempIndexes.add(i);
                }
            }

            if (waitingHandler != null && waitingHandler.isRunCanceled()) {
                break;
            }
        }

        return tagToIndexesMap;
    }

    /**
     * Retrieves the length of a protein.
     *
     * @param accession the accession of the protein of interest
     *
     * @return the length of this protein
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while interacting with the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    public Integer getProteinLength(String accession) throws SQLException, ClassNotFoundException, IOException, InterruptedException {
        Integer length = proteinLengthsCache.get(accession);
        if (length == null) {
            return getProteinLengthSynchronized(accession);
        }
        return length;
    }

    /**
     * Retrieves the length of a protein.
     *
     * @param accession the accession of the protein of interest
     *
     * @return the length of this protein
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while interacting with the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    private synchronized Integer getProteinLengthSynchronized(String accession) throws SQLException, ClassNotFoundException, IOException, InterruptedException {
        Integer length = proteinLengthsCache.get(accession);
        if (length == null) {
            Protein protein = sequenceFactory.getProtein(accession);
            if (protein != null) {
                length = protein.getLength();
            } else {
                throw new IllegalArgumentException("Length of protein " + accession + " not found.");
            }
            proteinLengthsCache.put(accession, length);
        }
        return length;
    }

    /**
     * Returns the initial tag size of the tree.
     *
     * @return the initial tag size of the tree
     *
     * @throws IOException exception thrown whenever an error occurs while
     * reading or writing a file.
     * @throws ClassNotFoundException exception thrown whenever an error occurs
     * while deserializing an object.
     * @throws InterruptedException exception thrown whenever a threading issue
     * occurred while interacting with the tree.
     * @throws SQLException if an SQLException exception thrown whenever a
     * problem occurred while interacting with the tree database.
     */
    public Integer getInitialTagSize() throws SQLException, IOException, ClassNotFoundException, InterruptedException {
        return componentsFactory.getInitialSize();
    }

    /**
     * Runnable used for the indexing of a protein sequence.
     */
    private class SequenceIndexer implements Runnable {

        /**
         * The proteins to process.
         */
        private ArrayList<Protein> proteins;
        /**
         * Boolean indicating whether the thread shall be interrupted.
         */
        private boolean finished = false;
        /**
         * List of tags to inspect.
         */
        private ArrayList<String> tags;
        /**
         * The enzyme to use.
         */
        private Enzyme enzyme;
        /**
         * The result of the indexing.
         */
        private HashMap<String, HashMap<String, ArrayList<Integer>>> indexes = new HashMap<String, HashMap<String, ArrayList<Integer>>>(proteinBatchSize);
        /**
         * The waiting handler.
         */
        private WaitingHandler waitingHandler;
        /**
         * Boolean indicating whether progress should be displayed.
         */
        private boolean displayProgress;
        /**
         * Handler for the exceptions.
         */
        private ExceptionHandler exceptionHandler;

        /**
         * Constructor.
         *
         * @param proteins the proteins to process
         * @param tags the tags to process
         * @param enzyme enzyme to use (can be null)
         * @param waitingHandler waiting handler providing feedback on the
         * process and allowing canceling the process
         * @param exceptionHandler handler for the exceptions encountered while
         * creating the tree
         * @param displayProgress boolean indicating whether progress shall be
         * displayed on the progress bar of the waiting handler
         */
        public SequenceIndexer(ArrayList<Protein> proteins, ArrayList<String> tags, Enzyme enzyme, WaitingHandler waitingHandler, ExceptionHandler exceptionHandler, boolean displayProgress) {
            this.proteins = proteins;
            this.tags = tags;
            this.enzyme = enzyme;
            this.waitingHandler = waitingHandler;
            this.exceptionHandler = exceptionHandler;
            this.displayProgress = displayProgress;
        }

        @Override
        public synchronized void run() {

            try {
                for (Protein protein : proteins) {

                    if (waitingHandler != null && waitingHandler.isRunCanceled()) {
                        return;
                    }

                    indexes.put(protein.getAccession(), getTagToIndexesMap(protein.getSequence(), tags, enzyme, waitingHandler));

                    if (displayProgress && waitingHandler != null && !waitingHandler.isRunCanceled()) {
                        waitingHandler.increaseSecondaryProgressCounter();
                    }

                    if (waitingHandler != null && waitingHandler.isRunCanceled()) {
                        return;
                    }
                }
            } catch (Exception ex) {
                if (exceptionHandler != null) {
                    exceptionHandler.catchException(ex);
                } else {
                    ex.printStackTrace();
                }
            }

            finished = true;

            try {
                runnableFinished();
            } catch (InterruptedException ex) {
                ex.printStackTrace();
            }
        }

        /**
         * Indicates whether the run is finished.
         *
         * @return true if the thread is finished.
         */
        public boolean isFinished() {
            return finished;
        }

        /**
         * Returns the indexes: protein accession > tag > indexes of the
         * tag on the protein sequence
         *
         * @return the indexes
         */
        public HashMap<String, HashMap<String, ArrayList<Integer>>> getIndexes() {
            return indexes;
        }

        /**
         * Clears the content of the runnable.
         */
        public void clear() {
            proteins.clear();
            tags = null;
            indexes.clear();
        }
    }

    /**
     * Runnable used to process raw nodes and store them in the database.
     */
    private class NodeSplitter implements Runnable {

        /**
         * The tag of the node.
         */
        private String tag;
        /**
         * The node.
         */
        private Node node;
        /**
         * the max node size.
         */
        private int maxNodeSize;
        /**
         * The max peptide size.
         */
        private int maxPeptideSize;
        /**
         * Boolean indicating whether the thread shall be interrupted.
         */
        private boolean finished = false;
        /**
         * The waiting handler.
         */
        private WaitingHandler waitingHandler;
        /**
         * Boolean indicating whether progress should be displayed.
         */
        private boolean displayProgress;
        /**
         * Handler for the exceptions.
         */
        private ExceptionHandler exceptionHandler;

        /**
         * Constructor.
         *
         *
         * @param maxNodeSize the maximal size allowed for a node
         * @param maxPeptideSize the maximal peptide length allowed
         * @param waitingHandler waiting handler providing feedback on the
         * process and allowing canceling the process
         * @param exceptionHandler handler for the exceptions encountered while
         * creating the tree
         * @param displayProgress boolean indicating whether progress shall be
         * displayed using the waiting handler
         */
        public NodeSplitter(String tag, Node node, int maxNodeSize, int maxPeptideSize, WaitingHandler waitingHandler, ExceptionHandler exceptionHandler, boolean displayProgress) {
            this.tag = tag;
            this.node = node;
            this.waitingHandler = waitingHandler;
            this.exceptionHandler = exceptionHandler;
            this.displayProgress = displayProgress;
        }

        @Override
        public synchronized void run() {

            try {
                node.splitNode(maxNodeSize, maxPeptideSize);
            } catch (Exception ex) {
                if (exceptionHandler != null) {
                    exceptionHandler.catchException(ex);
                } else {
                    ex.printStackTrace();
                }
            }

            finished = true;

            if (displayProgress && waitingHandler != null && !waitingHandler.isRunCanceled()) {
                waitingHandler.increaseSecondaryProgressCounter();
            }

            try {
                runnableFinished();
            } catch (Exception ex) {
                if (exceptionHandler != null) {
                    exceptionHandler.catchException(ex);
                } else {
                    ex.printStackTrace();
                }
            }
        }

        /**
         * Indicates whether the run is finished.
         *
         * @return true if the thread is finished.
         */
        public boolean isFinished() {
            return finished;
        }

        /**
         * Clears the content of the runnable.
         */
        public void clear() {
            node = null;
        }

        /**
         * Returns the tag of the split node.
         *
         * @return the tag of the split node
         */
        public String getTag() {
            return tag;
        }

        /**
         * Returns the split node.
         *
         * @return the split node
         */
        public Node getNode() {
            return node;
        }
    }
}