package com.compomics.util.experiment.identification.matches_iterators; import com.compomics.util.experiment.identification.Identification; import com.compomics.util.experiment.identification.matches.PeptideMatch; import com.compomics.util.experiment.identification.matches.ProteinMatch; import com.compomics.util.experiment.personalization.UrParameter; import com.compomics.util.waiting.WaitingHandler; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.concurrent.Semaphore; /** * An iterator of protein matches. * * @author Marc Vaudel * @author Harald Barsnes */ public class ProteinMatchesIterator { /** * The identification where to get the matches from. */ private final Identification identification; /** * The keys of the matches to load. */ private final ArrayList<String> proteinKeys; /** * The protein parameters to load along with the matches. */ private final ArrayList<UrParameter> proteinParameters; /** * If true the peptides corresponding to these proteins will be batch loaded * along with the proteins. */ private final boolean loadPeptides; /** * The peptides parameters to load along with the matches. */ private final ArrayList<UrParameter> peptidesParameters; /** * If true the PSMs corresponding to these peptides will be batch loaded * along with the peptides. */ private final boolean loadPsms; /** * The PSM parameters to load along with the matches. */ private final ArrayList<UrParameter> psmParameters; /** * The total number of matches to load. */ private final int nMatches; /** * The current index of the iterator. */ private int index = -1; /** * The default batch size. */ public final int defaultBatchSize = 1000; /** * The current batch size. */ private int batchSize = defaultBatchSize; /** * The current index of the batch loading. */ private int loadingIndex = -1; /** * Boolean indicating whether a thread is buffering. */ private boolean buffering = false; /** * Mutex for the buffering. */ private Semaphore bufferingMutex = new Semaphore(1); /** * The default margin to use to start batch loading before the loading index * is reached. */ public final double defaultMargin = 0.1; /** * The margin to use to start batch loading before the loading index is * reached. */ private double margin = defaultMargin; /** * The waiting handler. */ private WaitingHandler waitingHandler; /** * Constructor. * * @param proteinKeys the keys of the proteins to iterate * @param identification the identification where to get the matches from * @param proteinParameters the protein parameters to load along with the * matches * @param loadPeptides if true the peptides corresponding to these proteins * will be batch loaded along with the proteins * @param peptideParameters the parameters to load along with the peptide * matches * @param loadPsms if true the PSMs of the peptides will be batch loaded * along with the matches * @param psmParameters the parameters to load along with the matches * @param waitingHandler the waiting handler */ public ProteinMatchesIterator(ArrayList<String> proteinKeys, Identification identification, ArrayList<UrParameter> proteinParameters, boolean loadPeptides, ArrayList<UrParameter> peptideParameters, boolean loadPsms, ArrayList<UrParameter> psmParameters, WaitingHandler waitingHandler) { this.identification = identification; if (proteinKeys == null) { this.proteinKeys = new ArrayList<String>(identification.getProteinIdentification()); } else { this.proteinKeys = proteinKeys; } nMatches = this.proteinKeys.size(); this.proteinParameters = proteinParameters; this.loadPeptides = loadPeptides; this.peptidesParameters = peptideParameters; this.loadPsms = loadPsms; this.psmParameters = psmParameters; this.waitingHandler = waitingHandler; if (proteinKeys != null) { // adapt the batch size to avoid the hanging of the progress bar batchSize = Math.max(50, proteinKeys.size() / 100); batchSize = Math.min(1000, Math.max(batchSize, proteinKeys.size() / 1000)); } } /** * Constructor for an iterator iterating all protein keys. * * @param identification the identification where to get the matches from * @param proteinParameters the protein parameters to load along with the * matches * @param loadPeptides if true the peptides corresponding to these proteins * will be batch loaded along with the proteins * @param peptideParameters the parameters to load along with the peptide * matches * @param loadPsms if true the PSMs of the peptides will be batch loaded * along with the matches * @param psmParameters the parameters to load along with the matches * @param waitingHandler the waiting handler */ public ProteinMatchesIterator(Identification identification, ArrayList<UrParameter> proteinParameters, boolean loadPeptides, ArrayList<UrParameter> peptideParameters, boolean loadPsms, ArrayList<UrParameter> psmParameters, WaitingHandler waitingHandler) { this(null, identification, proteinParameters, loadPeptides, peptideParameters, loadPsms, psmParameters, waitingHandler); } /** * Indicates whether the iterator is done iterating. Warning: this method * can be wrong when multi threading. * * @return false if the iterator is done iterating */ public boolean hasNext() { return index < nMatches - 1; } /** * Returns the next match and updates the buffer. Null if the iterator is * done iterating. * * @return the next match * * @throws SQLException exception thrown whenever an error occurred while * interacting with the matches database * @throws IOException exception thrown whenever an error occurred while * interacting with the matches database * @throws ClassNotFoundException exception thrown whenever an error * occurred while deserializing a match from the database * @throws InterruptedException exception thrown whenever a threading issue * occurred while retrieving the match */ public ProteinMatch next() throws SQLException, IOException, ClassNotFoundException, InterruptedException { int threadIndex = incrementIndex(); if (threadIndex < nMatches) { checkBuffer(); String key = proteinKeys.get(threadIndex); ProteinMatch match = identification.getProteinMatch(key); return match; } return null; } /** * Makes sure that the next matches are buffered in the identification * cache. * * @throws SQLException exception thrown whenever an error occurred while * interacting with the matches database * @throws IOException exception thrown whenever an error occurred while * interacting with the matches database * @throws ClassNotFoundException exception thrown whenever an error * occurred while deserializing a match from the database * @throws InterruptedException exception thrown whenever a threading issue * occurred while retrieving the match */ private void checkBuffer() throws SQLException, IOException, ClassNotFoundException, InterruptedException { if (!buffering) { int trigger = loadingIndex - ((int) (margin * batchSize)); if (index >= trigger) { bufferingMutex.acquire(); trigger = loadingIndex - ((int) (margin * batchSize)); if (index >= trigger) { buffering = true; int newLoadingIndex = Math.min(loadingIndex + batchSize, nMatches - 1); ArrayList<String> keysInBatch = new ArrayList<String>(proteinKeys.subList(loadingIndex + 1, newLoadingIndex + 1)); identification.loadProteinMatches(keysInBatch, waitingHandler, false); if (waitingHandler != null && waitingHandler.isRunCanceled()) { return; } if (proteinParameters != null) { for (UrParameter urParameter : proteinParameters) { if (urParameter == null) { throw new IllegalArgumentException("Parameter to batch load is null."); } identification.loadProteinMatchParameters(keysInBatch, urParameter, waitingHandler, false); if (waitingHandler != null && waitingHandler.isRunCanceled()) { return; } } } ArrayList<String> peptideKeys = null; if (loadPeptides || peptidesParameters != null) { peptideKeys = new ArrayList<String>(batchSize); for (String proteinKey : keysInBatch) { ProteinMatch proteinMatch = identification.getProteinMatch(proteinKey); peptideKeys.addAll(proteinMatch.getPeptideMatchesKeys()); } } if (loadPeptides) { identification.loadPeptideMatches(peptideKeys, waitingHandler, false); if (loadPsms) { ArrayList<String> psmKeys = new ArrayList<String>(peptideKeys.size()); for (String peptideKey : peptideKeys) { PeptideMatch peptideMatch = identification.getPeptideMatch(peptideKey); psmKeys.addAll(peptideMatch.getSpectrumMatchesKeys()); } identification.loadSpectrumMatches(psmKeys, waitingHandler, false); if (psmParameters != null) { for (UrParameter urParameter : psmParameters) { if (urParameter == null) { throw new IllegalArgumentException("Parameter to batch load is null."); } identification.loadSpectrumMatchParameters(psmKeys, urParameter, waitingHandler, false); if (waitingHandler != null && waitingHandler.isRunCanceled()) { return; } } } } } if (peptidesParameters != null) { for (UrParameter urParameter : peptidesParameters) { if (urParameter == null) { throw new IllegalArgumentException("Parameter to batch load is null."); } identification.loadPeptideMatchParameters(peptideKeys, urParameter, waitingHandler, false); if (waitingHandler != null && waitingHandler.isRunCanceled()) { return; } } } loadingIndex = newLoadingIndex; trigger += (int) (margin * batchSize / 2); trigger = Math.max(0, trigger); if (index < trigger) { if (batchSize > defaultBatchSize) { batchSize = Math.max(defaultBatchSize, (int) 0.9 * batchSize); } else if (margin > defaultMargin) { margin = Math.max(defaultMargin, 0.9 * margin); } } } if (!bufferingMutex.hasQueuedThreads()) { buffering = false; } bufferingMutex.release(); } } else if (index == loadingIndex) { margin *= 1.1; if (margin >= 0.25) { batchSize *= 1.1; margin = defaultMargin; } } } /** * Increments the index of the iterator. * * @return an integer with value the incremented index */ private synchronized int incrementIndex() { int localIndex = ++index; return localIndex; } /** * Set the batch size. * * @param batchSize the batch size */ public void setBatchSize(int batchSize) { this.batchSize = batchSize; } }