/* * Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br) * Copyright (C) 2008,2009 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com) * * For further information check the LICENSE file. */ package bio.pih.genoogle.search; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import org.apache.log4j.Logger; import bio.pih.genoogle.encoder.SequenceEncoder; import bio.pih.genoogle.encoder.SequenceEncoderFactory; import bio.pih.genoogle.io.IndexedSequenceDataBank; import bio.pih.genoogle.io.Utils; import bio.pih.genoogle.seq.IllegalSymbolException; import bio.pih.genoogle.seq.SymbolList; import bio.pih.genoogle.statistics.MatchDismatchStatistics; import bio.pih.genoogle.statistics.Statistics; public class IndexBothStrandSearcher implements Callable<IndexSearchResults> { private IndexSearcher searcher; private IndexReverseComplementSearcher crSearcher; private static final Logger logger = Logger.getLogger(IndexBothStrandSearcher.class.getName()); private final long id; private final SearchParams sp; private final IndexedSequenceDataBank databank; private final ArrayList<RetrievedArea>[] retrievedAreas; private final ArrayList<RetrievedArea>[] rcRetrievedAreas; private final List<Throwable> fails; private final ExecutorService executor; @SuppressWarnings("unchecked") public IndexBothStrandSearcher(long id, SearchParams sp, IndexedSequenceDataBank databank, ExecutorService executor, List<Throwable> fails) { this.id = id; this.sp = sp; this.databank = databank; this.executor = executor; this.fails = fails; int numberOfSequences = databank.getNumberOfSequences(); this.retrievedAreas = new ArrayList[numberOfSequences]; this.rcRetrievedAreas = new ArrayList[numberOfSequences]; for (int i = 0; i < numberOfSequences; i++) { retrievedAreas[i] = new ArrayList<RetrievedArea>(0); rcRetrievedAreas[i] = new ArrayList<RetrievedArea>(0); } } @Override public IndexSearchResults call() throws InterruptedException { long searchBegin = System.currentTimeMillis(); SymbolList query = sp.getQuery(); Statistics statistics = new MatchDismatchStatistics(databank.getAlphabet(), databank.getEncoder(), sp.getMatchScore(), sp.getMismatchScore(), query, databank.getTotalDataBaseSize(), databank.getTotalNumberOfSequences()); String seqString = query.seqString(); int subSequenceLength = databank.getSubSequenceLength(); SequenceEncoder encoder = SequenceEncoderFactory.getEncoder(databank.getAlphabet(), subSequenceLength); int[] encodedQuery = encoder.encodeSymbolListToIntegerArray(query); String inverted = Utils.invert(query.seqString()); String rcString = Utils.sequenceComplement(inverted); SymbolList rcQuery = null; // this try/catch should never happens, because the rc string is create by a verified sequence. try { rcQuery = query.createSequence(rcString); } catch (IllegalSymbolException e) { logger.fatal(e); return null; } int[] rcEncodedQuery = encoder.encodeSymbolListToIntegerArray(rcQuery); int length = query.getLength(); int querySplitQuantity = sp.getQuerySplitQuantity(); int minLength = sp.getMinQuerySliceLength(); int sliceSize = length / querySplitQuantity; while (sliceSize < minLength && querySplitQuantity != 1) { querySplitQuantity--; sliceSize = length / querySplitQuantity; } CountDownLatch indexSearchersCountDown = new CountDownLatch(querySplitQuantity * 2); logger.info("(" + id + ") " + querySplitQuantity + " threads with slice query with " + length + " bases."); for (int i = 0; i < querySplitQuantity; i++) { int begin = (sliceSize * i); int end = (sliceSize * i) + sliceSize + (sp.getMinHspLength() - subSequenceLength); if (end > length) { end = length; } logger.info("(" + id + ") " + i + " [" + begin + " - " + end + "]."); String sliceQuery = seqString.substring(begin, end); String rcSliceQuery = rcString.substring(begin, end); submitSearch(sliceQuery, begin, query, encodedQuery, statistics, indexSearchersCountDown); submitRCSearch(rcSliceQuery, begin, rcQuery, rcEncodedQuery, statistics, indexSearchersCountDown); } indexSearchersCountDown.await(); if (fails.size() > 0) { return null; } IndexSearchResults results = new IndexSearchResults(searcher, crSearcher); int numberOfSequences = databank.getNumberOfSequences(); for (int i = 0; i < numberOfSequences; i++) { ArrayList<RetrievedArea> areas1 = retrievedAreas[i]; ArrayList<RetrievedArea> areas2 = rcRetrievedAreas[i]; if (areas1.size() > 0 || areas2.size() > 0) { @SuppressWarnings("unchecked") RetrievedSequenceAreas retrievedAreas = new RetrievedSequenceAreas(i, searcher.getDatabank(), areas1, areas2); results.add(retrievedAreas); } } logger.info("(" + id + ") " + "Index search time: " + (System.currentTimeMillis() - searchBegin)); return results; } private void submitSearch(String sliceQuery, int offset, SymbolList fullQuery, int[] encodedQuery, Statistics statistics, CountDownLatch countDown) { searcher = new IndexSearcher(id, sp, databank, sliceQuery, offset, fullQuery, encodedQuery, retrievedAreas, statistics, countDown, fails, 1); executor.submit(searcher); } private void submitRCSearch(String sliceQuery, int offset, SymbolList fullQuery, int[] encodedQuery, Statistics statistics, CountDownLatch countDown) { crSearcher = new IndexReverseComplementSearcher(id, sp, databank, sliceQuery, offset, fullQuery, encodedQuery, rcRetrievedAreas, statistics, countDown, fails, 1); executor.submit(crSearcher); } }