package org.jactr.core.module.declarative.search.local;
/*
* default logging
*/
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.SortedSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jactr.core.chunk.IChunk;
import org.jactr.core.chunktype.IChunkType;
import org.jactr.core.module.declarative.search.filter.DelegatedFilter;
import org.jactr.core.module.declarative.search.filter.IChunkFilter;
import org.jactr.core.module.declarative.search.filter.SlotFilter;
import org.jactr.core.production.request.ChunkTypeRequest;
import org.jactr.core.slot.IConditionalSlot;
import org.jactr.core.slot.ISlot;
import org.jactr.core.utils.collections.SkipListSetFactory;
/**
* default single threaded search algorithm.
*
* @author harrison
*/
public class ExactSingleThreadedSearchDelegate implements ISearchDelegate
{
/**
* Logger definition
*/
static private final transient Log LOGGER = LogFactory
.getLog(ExactSingleThreadedSearchDelegate.class);
protected final boolean _enableNotFilters = Boolean
.getBoolean("jactr.search.enableNotFilters");
/**
* will do all the filter processing, but not actually swap out the filter for
* the search. this tests the overhead of building the filters.
*/
protected final boolean _testNotFilter = Boolean
.getBoolean("jactr.search.testNotFilters");
public ExactSingleThreadedSearchDelegate()
{
}
@Override
public SortedSet<IChunk> find(ChunkTypeRequest pattern,
Comparator<IChunk> sortRule, IChunkFilter filter,
DefaultSearchSystem searchSystem)
{
/*
* second pass, ditch all those that don't match our chunktype
*/
SortedSet<IChunk> candidates = SkipListSetFactory
.newInstance(searchSystem._chunkNameComparator);
IChunkType chunkType = pattern.getChunkType();
/*
* we optimize the following slot based searches by first sorting the slots
* by an estimate of the result set size. This allows us to process the
* smallest first, allowing us to bail early without processing everything.
* We also support the conversion of not's (when possible) to filters, which
* is often cheaper since not's are expensive in terms of large set
* operations.
*/
List<ISlot> sortedSlots = null;
Collection<? extends ISlot> originalSlots = pattern
.getConditionalAndLogicalSlots();
IChunkFilter primaryFilter = searchSystem._defaultFilter;
if (_enableNotFilters || _testNotFilter)
{
sortedSlots = new ArrayList<ISlot>(originalSlots.size());
primaryFilter = sortPattern(chunkType, originalSlots, sortedSlots,
searchSystem);
}
else
sortedSlots = sortPatternOriginal(chunkType, originalSlots, searchSystem);
/*
* first things first, find all the candidates based on the content of the
* pattern. We sort the slots based on the estimated size of the returned
* set, then execute them. This lets us keep our candidate size down, which
* reduces the time cost of retainAll operations.
*/
boolean first = candidates.size() == 0;
for (ISlot slot : sortedSlots)
{
Collection<IChunk> localResults = searchSystem.find(chunkType, slot,
candidates);
if (first)
{
searchSystem.cleanAddAll(candidates, localResults);
first = false;
if (LOGGER.isDebugEnabled())
LOGGER.debug(String.format("Populating results from %s = %s", slot,
localResults));
}
else
{
searchSystem.cleanRetainAll(candidates, localResults);
if (LOGGER.isDebugEnabled())
LOGGER.debug(String.format("Retained results from %s = %s", slot,
localResults));
}
if (candidates.size() == 0)
{
if (LOGGER.isDebugEnabled())
LOGGER.debug(String.format("Early eject due to empty candidate set"));
break;
}
}
/*
* if there were no slots specified, we need to grab all chunks, preferrably
* with the type constrained. We could do this before the slot search and
* just add all the chunks as the candidate set - but the kills performance.
* it's quicker to search and start with a small set.
*/
if (sortedSlots.size() == 0)
if (chunkType != null)
candidates.addAll(chunkType.getSymbolicChunkType().getChunks());
else
try
{
// this is such a patholical case.
candidates.addAll(searchSystem._module.getChunks().get());
LOGGER
.error(String
.format(
"Warning: empty search specifications (%s) require full DM traversal. Please revise",
pattern));
}
catch (Exception e)
{
LOGGER.error("Failed to fetch all chunks for null chunktype search ",
e);
}
if (candidates.size() != 0)
{
/*
* we now need to deal with those that are actually the correct chunk
* type. Iteration over the candidates doing an isA() test would be
* O(candidates.size). candidates.retainAll(chunksOfType) is either
* O(candidates.size)*O(log(chunksOfType.size) or
* O(chunksOfType.size)*O(log(candidates.size)). Until the Fast
* collections come out with their predicate iterators, we will just
* iterate raw. And use the opportunity to filter and sort.
*/
Comparator<IChunk> comparator = searchSystem._chunkNameComparator;
if (sortRule != null) comparator = sortRule;
IChunkFilter chunkFilter = filter == null ? searchSystem._defaultFilter
: filter;
SortedSet<IChunk> returnCandidates = SkipListSetFactory
.newInstance(comparator);
for (IChunk candidate : candidates)
if (chunkType == null || candidate.isA(chunkType))
if (primaryFilter.accept(candidate))
if (chunkFilter.accept(candidate))
// shouldn't we actually test this against the pattern, jsut to be
// sure?
returnCandidates.add(candidate);
searchSystem.recycleCollection(candidates);
candidates = returnCandidates;
}
if (LOGGER.isDebugEnabled())
LOGGER.debug("First pass candidates for " + pattern + " chunks: "
+ candidates);
return candidates;
}
/**
* sort the slots by the guessed size of the result set. This is only used by
* findExact. We also convert not's into filters instead whereever possible
*
* @param chunkType
* @param originalSlots
* @return
*/
protected IChunkFilter sortPattern(IChunkType chunkType,
Collection<? extends ISlot> originalSlots, List<ISlot> container,
DefaultSearchSystem searchSystem)
{
// ArrayList<ISlot> sorted = new ArrayList<ISlot>(originalSlots);
container.addAll(originalSlots);
Map<ISlot, Long> sizeMap = new HashMap<ISlot, Long>();
for (ISlot slot : originalSlots)
sizeMap.put(slot, searchSystem.guessSize(chunkType, slot));
// Collections.sort(sorted, new PatternComparator(sizeMap));
Collections.sort(container, new PatternComparator(sizeMap));
/*
* after they are sorted, we could iterate over this set and if the first
* slot isn't a not, we can turn all subsequent not's (conditional, not
* logical) into filters instead.
*/
boolean safeToFilter = false;
ListIterator<ISlot> sItr = container.listIterator();
DelegatedFilter notFilter = null;
while (sItr.hasNext())
{
ISlot slot = sItr.next();
if (slot instanceof IConditionalSlot)
{
IConditionalSlot cSlot = (IConditionalSlot) slot;
if (cSlot.getCondition() == IConditionalSlot.NOT_EQUALS)
if (safeToFilter)
{
if (LOGGER.isDebugEnabled())
LOGGER.debug(String.format("Converting %s to a filter", cSlot));
if (!_testNotFilter)
{
if (notFilter == null) notFilter = new DelegatedFilter();
notFilter.add(new SlotFilter(cSlot));
sItr.remove();
}
}
else if (LOGGER.isDebugEnabled())
LOGGER.debug(String.format("Cannot convert %s to filter", cSlot));
}
safeToFilter = true;
}
return notFilter == null ? searchSystem._defaultFilter : notFilter;
}
/**
* sort the slots by the guessed size of the result set.
*
* @param chunkType
* @param slots
* @return
*/
protected List<ISlot> sortPatternOriginal(IChunkType chunkType,
Collection<? extends ISlot> slots, DefaultSearchSystem searchSystem)
{
ArrayList<ISlot> sorted = new ArrayList<ISlot>(slots);
Map<ISlot, Long> sizeMap = new HashMap<ISlot, Long>();
for (ISlot slot : slots)
sizeMap.put(slot, searchSystem.guessSize(chunkType, slot));
Collections.sort(sorted, new PatternComparator(sizeMap));
return sorted;
}
}