/*
* Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br)
* Copyright (C) 2008,2009, 2010, 2011, 2012 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com)
*
* For further information check the LICENSE file.
*/
package bio.pih.genoogle.search;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import org.apache.log4j.Logger;
import bio.pih.genoogle.alignment.SubstitutionMatrix;
import bio.pih.genoogle.encoder.SequenceEncoder;
import bio.pih.genoogle.io.RemoteSimilaritySequenceDataBank;
import bio.pih.genoogle.seq.SymbolList;
import bio.pih.genoogle.seq.protein.Converter;
import bio.pih.genoogle.statistics.SubstitutionMatrixStatistics;
public class IndexSixFramesSearcher implements Callable<IndexSearchResults> {
private static final Logger logger = Logger.getLogger(IndexSixFramesSearcher.class.getName());
private final long id;
private final SearchParams sp;
private final RemoteSimilaritySequenceDataBank databank;
private final ArrayList<RetrievedArea>[] retrievedAreasFrame1;
private final ArrayList<RetrievedArea>[] retrievedAreasFrame2;
private final ArrayList<RetrievedArea>[] retrievedAreasFrame3;
private final ArrayList<RetrievedArea>[] rcRetrievedAreasFrame1;
private final ArrayList<RetrievedArea>[] rcRetrievedAreasFrame2;
private final ArrayList<RetrievedArea>[] rcRetrievedAreasFrame3;
private final List<Throwable> fails;
private final ExecutorService executor;
private final SequenceEncoder encoder;
@SuppressWarnings("unchecked")
public IndexSixFramesSearcher(long id, SearchParams sp, RemoteSimilaritySequenceDataBank databank,
ExecutorService executor, List<Throwable> fails) {
this.id = id;
this.sp = sp;
this.databank = databank;
this.executor = executor;
this.fails = fails;
int numberOfSequences = databank.getNumberOfSequences();
this.encoder = databank.getReducedEncoder();
this.retrievedAreasFrame1 = new ArrayList[numberOfSequences];
this.retrievedAreasFrame2 = new ArrayList[numberOfSequences];
this.retrievedAreasFrame3 = new ArrayList[numberOfSequences];
this.rcRetrievedAreasFrame1 = new ArrayList[numberOfSequences];
this.rcRetrievedAreasFrame2 = new ArrayList[numberOfSequences];
this.rcRetrievedAreasFrame3 = new ArrayList[numberOfSequences];
}
// TODO: Fix statistics (correct alphabet and match and mismatch scores)
@Override
public IndexSearchResults call() throws InterruptedException {
long searchBegin = System.currentTimeMillis();
SymbolList query = sp.getQuery();
SymbolList dnaToProtein1 = Converter.dnaToProtein(query);
SymbolList dnaToProtein2 = Converter.dnaToProtein2(query);
SymbolList dnaToProtein3 = Converter.dnaToProtein3(query);
SymbolList dnaToProteinComplement1 = Converter.dnaToProteinComplement1(query);
SymbolList dnaToProteinComplement2 = Converter.dnaToProteinComplement2(query);
SymbolList dnaToProteinComplement3 = Converter.dnaToProteinComplement3(query);
SymbolList read1 = Converter.proteinToReducedAA(dnaToProtein1);
SymbolList read2 = Converter.proteinToReducedAA(dnaToProtein2);
SymbolList read3 = Converter.proteinToReducedAA(dnaToProtein3);
SymbolList complement1 = Converter.proteinToReducedAA(dnaToProteinComplement1);
SymbolList complement2 = Converter.proteinToReducedAA(dnaToProteinComplement2);
SymbolList complement3 = Converter.proteinToReducedAA(dnaToProteinComplement3);
int[] encodedReducedRead1 = encoder.encodeSymbolListToIntegerArray(read1);
int[] encodedReducedRead2 = encoder.encodeSymbolListToIntegerArray(read2);
int[] encodedReducedRead3 = encoder.encodeSymbolListToIntegerArray(read3);
int[] encodedReducedComplement1 = encoder.encodeSymbolListToIntegerArray(complement1);
int[] encodedReducedComplement2 = encoder.encodeSymbolListToIntegerArray(complement2);
int[] encodedReducedComplement3 = encoder.encodeSymbolListToIntegerArray(complement3);
CountDownLatch indexSearchersCountDown = new CountDownLatch(6);
IndexSearcher searcher1 = submitSearch(read1.seqString(), 0, dnaToProtein1, encodedReducedRead1, indexSearchersCountDown, 1, retrievedAreasFrame1);
IndexSearcher searcher2 = submitSearch(read2.seqString(), 0, dnaToProtein2, encodedReducedRead2, indexSearchersCountDown, 2, retrievedAreasFrame2);
IndexSearcher searcher3 = submitSearch(read3.seqString(), 0, dnaToProtein3, encodedReducedRead3, indexSearchersCountDown, 3, retrievedAreasFrame3);
IndexSearcher rcSearcher1 = submitRCSearch(complement1.seqString(), 0, dnaToProteinComplement1, encodedReducedComplement1, indexSearchersCountDown, 1, rcRetrievedAreasFrame1);
IndexSearcher rcSearcher2 = submitRCSearch(complement2.seqString(), 0, dnaToProteinComplement2, encodedReducedComplement2, indexSearchersCountDown, 2, rcRetrievedAreasFrame2);
IndexSearcher rcSearcher3 = submitRCSearch(complement3.seqString(), 0, dnaToProteinComplement3, encodedReducedComplement3, indexSearchersCountDown, 3, rcRetrievedAreasFrame3);
indexSearchersCountDown.await();
if (fails.size() > 0) {
return null;
}
IndexSearchResults results = new IndexSearchResults(searcher1, searcher2, searcher3, rcSearcher1, rcSearcher2, rcSearcher3);
int numberOfSequences = databank.getNumberOfSequences();
for (int i = 0; i < numberOfSequences; i++) {
ArrayList<RetrievedArea> areas1 = retrievedAreasFrame1[i];
ArrayList<RetrievedArea> areas2 = retrievedAreasFrame2[i];
ArrayList<RetrievedArea> areas3 = retrievedAreasFrame3[i];
ArrayList<RetrievedArea> areas4 = rcRetrievedAreasFrame1[i];
ArrayList<RetrievedArea> areas5 = rcRetrievedAreasFrame2[i];
ArrayList<RetrievedArea> areas6 = rcRetrievedAreasFrame3[i];
if (areas1 != null || areas2 != null || areas3 != null || areas4 != null || areas5 != null || areas6 != null) {
@SuppressWarnings("unchecked")
RetrievedSequenceAreas retrievedAreas = new RetrievedSequenceAreas(i, searcher1.getDatabank(), areas1, areas2, areas3, areas4, areas5, areas6);
results.add(retrievedAreas);
}
}
logger.info("(" + id + ") " + "Index search time: " + (System.currentTimeMillis() - searchBegin));
return results;
}
private IndexSearcher submitSearch(String sliceQuery, int offset, SymbolList fullQuery, int[] encodedQuery,
CountDownLatch countDown, int frame, List<RetrievedArea>[] retrievedAreas) {
SubstitutionMatrixStatistics statistics = new SubstitutionMatrixStatistics(databank.getAaEncoder().getAlphabet(), SubstitutionMatrix.BLOSUM62,
fullQuery, databank.getTotalDataBaseSize(), databank.getTotalNumberOfSequences());
IndexSearcher searcher = new IndexSearcher(id, sp, databank, encoder, encoder.getSubSequenceLength() , sliceQuery, offset, fullQuery, encodedQuery, retrievedAreas, statistics, countDown, fails, frame);
executor.submit(searcher);
return searcher;
}
private IndexSearcher submitRCSearch(String sliceQuery, int offset, SymbolList fullQuery, int[] encodedQuery,
CountDownLatch countDown, int frame, List<RetrievedArea>[] retrievedAreas) {
SubstitutionMatrixStatistics statistics = new SubstitutionMatrixStatistics(databank.getAaEncoder().getAlphabet(), SubstitutionMatrix.BLOSUM62,
fullQuery, databank.getTotalDataBaseSize(), databank.getTotalNumberOfSequences());
IndexSearcher crSearcher = new IndexReverseComplementSearcher(id, sp, databank, encoder, encoder.getSubSequenceLength(), sliceQuery, offset, fullQuery, encodedQuery, retrievedAreas, statistics, countDown, fails, frame);
executor.submit(crSearcher);
return crSearcher;
}
}