/*
* Genoogle: Similar DNA Sequences Searching Engine and Tools. (http://genoogle.pih.bio.br)
* Copyright (C) 2008,2009,2010,2011,2012 Felipe Fernandes Albrecht (felipe.albrecht@gmail.com)
*
* For further information check the LICENSE file.
*/
package bio.pih.genoogle.search;
import java.util.List;
import bio.pih.genoogle.index.SubSequenceIndexInfo;
import bio.pih.genoogle.util.CircularArrayList;
import bio.pih.genoogle.util.Poll;
import com.google.common.collect.Lists;
/**
* Stores the data retrieved from the Inverted Index.
*
* @author albrecht
*/
public class IndexRetrievedData {
private final List<RetrievedArea>[] retrievedAreasArray;
private final CircularArrayList[] openedAreasArray;
private final Poll poll = new Poll();
private final int minLength;
private final int subSequenceLength;
private final int maxSubSequenceDistance;
public int hits;
/**
* Constructor.
*
* @param size
* Quantity of sequences stored in the data bank.
* @param sp
* Search parameters.
* @param subSequenceLength
* Sub sequences length.
* @param searcher
* Index searcher that is used.
*/
public IndexRetrievedData(int size, SearchParams sp, int subSequenceLength, IndexSearcher searcher) {
this(size, sp, subSequenceLength, searcher, null);
}
@SuppressWarnings("unchecked")
public IndexRetrievedData(int size, SearchParams sp, int subSequenceLength, IndexSearcher searche, List<RetrievedArea>[] retrievedData) {
this.minLength = sp.getMinHspLength();
this.subSequenceLength = subSequenceLength;
this.maxSubSequenceDistance = sp.getMaxSubSequencesDistance();
if (retrievedData == null) {
this.retrievedAreasArray = new List[size];
} else {
this.retrievedAreasArray = retrievedData;
}
openedAreasArray = new CircularArrayList[size];
}
/**
* Insert a found subSequences and check if it will be merged or added as a
* new area.
*
* @param queryPos
* sub-sequence position in the query.
* @param subSequenceInfoIntRepresention
* representation of the sub sequence by
* {@link SubSequenceIndexInfo}.
*/
final void addSubSequenceInfoIntRepresention(int queryPos, long subSequenceInfoIntRepresention) {
int sequencePos = SubSequenceIndexInfo.getStart(subSequenceInfoIntRepresention);
int sequenceId = SubSequenceIndexInfo.getSequenceId(subSequenceInfoIntRepresention);
mergeOrRemoveOrNew(queryPos, sequencePos, sequenceId);
}
/**
* Merge the subsequence or create a new retrieved area.
*
* @param queryPos
* Position in the query.
* @param sequencePos
* Position in the data bank sequence.
* @param sequenceId
* Data bank sequence id.
*/
private final void mergeOrRemoveOrNew(int queryPos, int sequencePos, int sequenceId) {
CircularArrayList openedList = openedAreasArray[sequenceId];
if (openedList == null) {
openedList = poll.pop();
}
if (openedList.size() == 0) {
openedAreasArray[sequenceId] = openedList;
openedList.add(queryPos, sequencePos, subSequenceLength);
} else {
boolean merged = false;
int totalRemove = 0;
int pos = 0;
for (RetrievedArea openedArea = openedList.get(pos); (openedArea = openedList.get(pos)) != null; pos++) {
// Try merge with previous area.
if (openedArea.testAndSet(queryPos, sequencePos, maxSubSequenceDistance, subSequenceLength)) {
merged = true;
openedList.rePos(openedArea, pos);
// Check if the area end is away from the actual sequence
// position.
} else if (queryPos - openedArea.getQueryAreaEnd() > maxSubSequenceDistance) {
// Count areas to remove.
totalRemove++;
if (openedArea.length() >= minLength) {
if (retrievedAreasArray[sequenceId] == null) {
retrievedAreasArray[sequenceId] = Lists.newArrayList();
}
hits++;
retrievedAreasArray[sequenceId].add(openedArea.copy());
}
}
}
if (totalRemove != 0) {
openedList.removeElements(totalRemove);
}
if (!merged) {
openedList.add(queryPos, sequencePos, subSequenceLength);
} else {
if (openedList.size() == 0) {
poll.push(openedList);
openedAreasArray[sequenceId] = null;
}
}
}
}
/**
* Finish the index searching process. It will close all retrieved areas and
* it will check if the areas has the minumun length.
*
* @return all {@link RetrievedArea} that has at least the minimum length.
*/
public List<RetrievedArea>[] finish() {
for (int sequenceId = 0; sequenceId < openedAreasArray.length; sequenceId++) {
CircularArrayList openedList = openedAreasArray[sequenceId];
if (openedList != null) {
int pos = 0;
for (RetrievedArea openedArea = openedList.get(pos); (openedArea = openedList.get(pos)) != null; pos++) {
if (openedArea.length() >= minLength) {
if (retrievedAreasArray[sequenceId] == null) {
retrievedAreasArray[sequenceId] = Lists.newArrayList();
}
retrievedAreasArray[sequenceId].add(openedArea);
}
}
}
}
return retrievedAreasArray;
}
/**
* Get the retrieved areas.
*
* @return all {@link RetrievedArea} that has at least the minimum length.
*/
public List<RetrievedArea>[] getRetrievedAreasArray() {
return retrievedAreasArray;
}
}