package com.limegroup.gnutella.library;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.limewire.collection.Function;
import org.limewire.collection.IdentityHashSet;
import org.limewire.collection.IntSet;
import org.limewire.collection.MultiIterator;
import org.limewire.collection.StringTrie;
import org.limewire.core.settings.SearchSettings;
import org.limewire.core.settings.SharingSettings;
import org.limewire.inject.EagerSingleton;
import org.limewire.inspection.DataCategory;
import org.limewire.inspection.InspectableForSize;
import org.limewire.lifecycle.Service;
import org.limewire.lifecycle.ServiceRegistry;
import org.limewire.listener.EventListener;
import org.limewire.listener.ListenerSupport;
import org.limewire.util.I18NConvert;
import org.limewire.util.MediaType;
import org.limewire.util.StringUtils;
import com.google.inject.Inject;
import com.google.inject.Provider;
import com.limegroup.gnutella.ActivityCallback;
import com.limegroup.gnutella.MediaTypeAggregator;
import com.limegroup.gnutella.Response;
import com.limegroup.gnutella.ResponseFactory;
import com.limegroup.gnutella.URN;
import com.limegroup.gnutella.messages.QueryRequest;
import com.limegroup.gnutella.util.QueryUtils;
import com.limegroup.gnutella.xml.LimeXMLDocument;
import com.limegroup.gnutella.xml.LimeXMLReplyCollection;
import com.limegroup.gnutella.xml.LimeXMLSchema;
import com.limegroup.gnutella.xml.LimeXMLSchemaRepository;
import com.limegroup.gnutella.xml.LimeXMLUtils;
import com.limegroup.gnutella.xml.SchemaReplyCollectionMapper;
// TODO split this up further and remove query and response from here,
// or introduce a generic indexing class that can be used
@EagerSingleton
class SharedFilesKeywordIndexImpl implements SharedFilesKeywordIndex {
/**
* A trie mapping keywords in complete filenames to the indices in _files.
* Keywords are the tokens when the filename is tokenized with the
* characters from DELIMITERS as delimiters.
*
* IncompleteFile keywords are NOT stored.
*
* INVARIANT: For all keys k in _keywordTrie, for all i in the IntSet
* _keywordTrie.get(k), _files[i]._path.substring(k)!=-1. Likewise for all
* i, for all k in _files[i]._path where _files[i] is not an
* IncompleteFileDesc, _keywordTrie.get(k) contains i.
*
* Not threadsafe, hold lock on field.
*/
@InspectableForSize(value = "size of keyword trie", category = DataCategory.USAGE)
private final StringTrie<IntSet> keywordTrie = new StringTrie<IntSet>(true);
/**
* A trie mapping keywords in complete filenames to the indices in _files.
* Contains ONLY incomplete keywords.
*
* Not threadsafe, hold lock on field.
*/
@InspectableForSize(value = "size of incomplete keyword trie", category = DataCategory.USAGE)
private final StringTrie<IntSet> incompleteKeywordTrie = new StringTrie<IntSet>(true);
private final Provider<CreationTimeCache> creationTimeCache;
private final Provider<ResponseFactory> responseFactory;
private final Library library;
private final FileView gnutellaFileView;
private final FileView incompleteFileView;
private final Provider<SchemaReplyCollectionMapper> schemaReplyCollectionMapper;
private final ActivityCallback activityCallback;
private final LimeXMLSchemaRepository schemaRepository;
@Inject
public SharedFilesKeywordIndexImpl(Library library,
Provider<CreationTimeCache> creationTimeCache,
Provider<ResponseFactory> responseFactory,
Provider<SchemaReplyCollectionMapper> schemaReplyCollectionMapper,
ActivityCallback activityCallback, LimeXMLSchemaRepository schemaRepository,
@GnutellaFiles FileView gnutellaFileView,
@IncompleteFiles FileView incompleteFileView) {
this.library = library;
this.creationTimeCache = creationTimeCache;
this.responseFactory = responseFactory;
this.schemaReplyCollectionMapper = schemaReplyCollectionMapper;
this.activityCallback = activityCallback;
this.schemaRepository = schemaRepository;
this.incompleteFileView = incompleteFileView;
this.gnutellaFileView = gnutellaFileView;
}
@Inject void register(ServiceRegistry registry, final ListenerSupport<FileDescChangeEvent> fileDescSupport) {
registry.register(new Service() {
@Override
public String getServiceName() {
return "P2P Network Keyword Library";
}
@Override
public void initialize() {
fileDescSupport.addListener(new EventListener<FileDescChangeEvent>() {
@Override
public void handleEvent(FileDescChangeEvent event) {
handleFileDescEvent(event);
}
});
library.addManagedListStatusListener(new EventListener<LibraryStatusEvent>() {
@Override
public void handleEvent(LibraryStatusEvent event) {
handleManagedListStatusEvent(event);
}
});
gnutellaFileView.addListener(new EventListener<FileViewChangeEvent>() {
@Override
public void handleEvent(FileViewChangeEvent event) {
handleFileListEvent(event, true);
}
});
incompleteFileView.addListener(new EventListener<FileViewChangeEvent>() {
@Override
public void handleEvent(FileViewChangeEvent event) {
handleFileListEvent(event, false);
}
});
}
@Override
public void start() {
// TODO Auto-generated method stub
}
@Override
public void stop() {
// TODO Auto-generated method stub
}
});
}
@Override
public Response[] query(QueryRequest request) {
Set<Response> responses = QueryProcessor.processQuery(request, this);
incrementHitCount(responses);
return responses.toArray(new Response[responses.size()]);
}
/**
* Increment hit counts for files which matched a query
*
* @param matches set of Response objects
*/
private void incrementHitCount(Set<Response> matches) {
for (Response resp : matches) {
long index = resp.getIndex();
// casting to int because Response originally created with positive int
FileDesc desc = library.getFileDescForIndex((int)index);
if(desc != null) {
desc.incrementHitCount();
}
}
}
private Set<Response> queryMetaData(QueryRequest request) {
Set<LimeXMLDocument> documents = Collections.emptySet();
LimeXMLDocument doc = request.getRichQuery();
if (doc != null) {
documents = queryMetaDataWithRequestXml(doc);
} else if (SearchSettings.INCLUDE_METADATA_IN_PLAINTEXT_SEARCH.getValue()) {
// no xml query, look if any xml field of the matching mediatype
// starts with the keywords of the request
documents = queryMetaDataWithPlaintext(request);
}
return createResponses(documents);
}
/*
* (non-Javadoc)
*
* @see com.limegroup.gnutella.FileManager#query(com.limegroup.gnutella.messages.QueryRequest)
*/
private Set<Response> queryFileNames(QueryRequest request) {
String str = request.getQuery();
boolean includeXML = request.shouldIncludeXMLInResponse();
// Normal case: query the index to find all matches. TODO: this
// sometimes returns more results (>255) than we actually send out.
// That's wasted work.
// Trie requires that getPrefixedBy(String, int, int) passes
// an already case-changed string. Both search & urnSearch
// do this kind of match, so we canonicalize the case for them.
str = keywordTrie.canonicalCase(str);
IntSet matches = search(str, null, request.desiresPartialResults());
if (request.getQueryUrns().size() > 0)
matches = urnSearch(request.getQueryUrns(), matches);
if (matches == null)
return Collections.emptySet();
Set<Response> responses = new HashSet<Response>();
final MediaTypeAggregator.Aggregator filter = MediaTypeAggregator.getAggregator(request);
LimeXMLDocument doc = request.getRichQuery();
// Iterate through our hit indices to create a list of results.
for (IntSet.IntSetIterator iter = matches.iterator(); iter.hasNext();) {
int i = iter.next();
FileDesc desc = gnutellaFileView.getFileDescForIndex(i);
if(desc == null) {
desc = incompleteFileView.getFileDescForIndex(i);
}
if(desc != null) {
//desc can bet null if items were removed after the IntSet matches were built
if ((filter != null) && !filter.allow(desc.getFileName()))
continue;
activityCallback.handleSharedFileUpdate(desc.getFile());
Response resp = responseFactory.get().createResponse(desc);
if (includeXML) {
if (doc != null && resp.getDocument() != null && !isValidXMLMatch(resp, doc))
continue;
} else {
//remove xml doc to save bandwidth
resp.setDocument(null);
}
responses.add(resp);
}
}
if (responses.size() == 0)
return Collections.emptySet();
return responses;
}
private static boolean isValidXMLMatch(Response r, LimeXMLDocument doc) {
return LimeXMLUtils.match(r.getDocument(), doc, true);
}
/**
* Find all files with matching full URNs
*/
private IntSet urnSearch(Iterable<URN> urnsIter, IntSet priors) {
IntSet ret = priors;
for (URN urn : urnsIter) {
List<FileDesc> fds = gnutellaFileView.getFileDescsMatching(urn);
for(FileDesc fd : fds) {
if(ret == null) {
ret = new IntSet();
}
ret.add(fd.getIndex());
}
}
return ret;
}
/**
* Responds to a what is new request.
*/
private Set<Response> queryWhatsNew(QueryRequest request) {
boolean includeXML = request.shouldIncludeXMLInResponse();
// see if there are any files to send....
// NOTE: we only request up to 3 urns. we don't need to worry
// about partial files because we don't add them to the cache.
// NOTE: this doesn't return Store files. getNewestUrns only
// returns the top 3 shared files
Collection<URN> urnList = creationTimeCache.get().getFiles(request, 3);
if (urnList.size() == 0)
return Collections.emptySet();
// get the appropriate responses
Set<Response> resps = new HashSet<Response>(urnList.size());
for (URN urn : urnList) {
FileDesc desc = gnutellaFileView.getFileDesc(urn);
// should never happen since we don't add times for IFDs and
// we clear removed files...
if ((desc == null) || (desc instanceof IncompleteFileDesc))
throw new RuntimeException("Bad Rep - No IFDs allowed!");
// Formulate the response
Response r = responseFactory.get().createResponse(desc);
if(!includeXML) {
r.setDocument(null);
}
// Cache it
resps.add(r);
}
return resps;
}
private void clear(boolean complete) {
if(complete) {
keywordTrie.clear();
} else {
incompleteKeywordTrie.clear();
}
}
private void handleFileListEvent(FileViewChangeEvent evt, boolean complete) {
switch(evt.getType()) {
case FILE_ADDED:
addFileDesc(evt.getFileDesc(), complete);
break;
case FILE_CHANGED:
removeFileDesc(evt.getOldValue(), complete);
addFileDesc(evt.getFileDesc(), complete);
break;
case FILE_REMOVED:
removeFileDesc(evt.getFileDesc(), complete);
break;
case FILES_CLEARED:
clear(complete);
break;
case FILE_META_CHANGED:
// purposely do nothing!
// meta change on a view means metadata changed,
// and we don't process metadata here!
break;
}
}
private void handleManagedListStatusEvent(LibraryStatusEvent evt) {
switch (evt.getType()) {
case LOAD_COMPLETE:
trim();
break;
}
}
private void handleFileDescEvent(FileDescChangeEvent evt) {
FileDesc fd = evt.getSource();
switch(evt.getType()) {
case TT_ROOT_ADDED:
if(fd instanceof IncompleteFileDesc) {
IncompleteFileDesc ifd = (IncompleteFileDesc) fd;
if (SharingSettings.ALLOW_PARTIAL_SHARING.getValue() &&
SharingSettings.LOAD_PARTIAL_KEYWORDS.getValue() &&
ifd.hasUrnsAndPartialData()) {
addFileDesc(fd, false);
}
}
break;
}
}
private void removeFileDesc(FileDesc fileDesc, boolean complete) {
if(complete) {
removeKeywords(keywordTrie, fileDesc);
} else {
removeKeywords(incompleteKeywordTrie, fileDesc);
}
}
private void addFileDesc(FileDesc fileDesc, boolean complete) {
if(!complete) {
boolean indexIncompleteFiles = SharingSettings.ALLOW_PARTIAL_SHARING.getValue()
&& SharingSettings.LOAD_PARTIAL_KEYWORDS.getValue();
IncompleteFileDesc ifd = (IncompleteFileDesc) fileDesc;
if (indexIncompleteFiles && ifd.hasUrnsAndPartialData()) {
loadKeywords(incompleteKeywordTrie, fileDesc);
}
} else {
loadKeywords(keywordTrie, fileDesc);
}
}
/**
* @param trie to update
* @param fd to load keywords from
*/
private void loadKeywords(StringTrie<IntSet> trie, FileDesc fd) {
// Index the filename. For each keyword...
String[] keywords = extractKeywords(fd);
for (String keyword : keywords) {
synchronized (trie) {
// Ensure the _keywordTrie has a set of indices associated with
// keyword.
IntSet indices = trie.get(keyword);
if (indices == null) {
indices = new IntSet();
trie.add(keyword, indices);
}
// Add fileIndex to the set.
indices.add(fd.getIndex());
}
}
}
private void removeKeywords(StringTrie<IntSet> trie, FileDesc fd) {
// Remove references to this from index.
String[] keywords = extractKeywords(fd);
for (String keyword : keywords) {
synchronized (trie) {
IntSet indices = trie.get(keyword);
if (indices != null) {
indices.remove(fd.getIndex());
if (indices.size() == 0)
trie.remove(keyword);
}
}
}
}
/**
* Returns a set of indices of files matching <code>query</code>, or null
* if there are no matches. Subclasses may override to provide different
* notions of matching. The caller of this method must not mutate the
* returned value.
*/
protected IntSet search(String query, IntSet priors, boolean partial) {
// As an optimization, we lazily allocate all sets in case there are no
// matches. TODO2: we can avoid allocating sets when getPrefixedBy
// returns an iterator of one element and there is only one keyword.
IntSet ret = priors;
// For each keyword in the query.... (Note that we avoid calling
// StringUtils.split and take advantage of Trie's offset/limit feature.)
for (int i = 0; i < query.length();) {
if (QueryUtils.isDelimiter(query.charAt(i))) {
i++;
continue;
}
int j;
for (j = i + 1; j < query.length(); j++) {
if (QueryUtils.isDelimiter(query.charAt(j)))
break;
}
// Search for keyword, i.e., keywords[i...j-1].
Iterator<IntSet> iter;
synchronized (keywordTrie) {
iter = keywordTrie.getPrefixedBy(query, i, j);
}
if (SharingSettings.ALLOW_PARTIAL_SHARING.getValue()
&& SharingSettings.ALLOW_PARTIAL_RESPONSES.getValue() && partial) {
Iterator<IntSet> incompleteIndices;
synchronized (incompleteKeywordTrie) {
incompleteIndices = incompleteKeywordTrie.getPrefixedBy(query, i, j);
}
iter = new MultiIterator<IntSet>(iter, incompleteIndices);
}
synchronized (keywordTrie) {
synchronized (incompleteKeywordTrie) {
if (iter.hasNext()) {
// Got match. Union contents of the iterator and store
// in
// matches. As an optimization, if this is the only
// keyword and
// there is only one set returned, return that set
// without
// copying.
IntSet matches = null;
while (iter.hasNext()) {
IntSet s = iter.next();
if (matches == null) {
if (i == 0 && j == query.length() && !(iter.hasNext()))
return s;
matches = new IntSet();
}
matches.addAll(s);
}
// Intersect matches with ret. If ret isn't allocated,
// initialize to matches.
if (ret == null)
ret = matches;
else
ret.retainAll(matches);
} else {
// No match. Optimization: no matches for keyword =>
// failure
return null;
}
// Optimization: no matches after intersect => failure
if (ret.size() == 0)
return null;
i = j;
}
}
}
if (ret == null || ret.size() == 0)
return null;
return ret;
}
/**
* Utility method to perform standardized keyword extraction for the given
* <tt>FileDesc</tt>. This handles extracting keywords according to
* locale-specific rules.
*
* @param fd the <tt>FileDesc</tt> containing a file system path with
* keywords to extact
* @return an array of keyword strings for the given file
*/
private static String[] extractKeywords(FileDesc fd) {
return StringUtils.split(I18NConvert.instance().getNorm(fd.getPath()),
QueryUtils.DELIMITERS);
}
/**
* Ensures that this's index takes the minimum amount of space. Only affects
* performance, not correctness; hence no modifies clause.
*/
private void trim() {
for (StringTrie<IntSet> trie : new StringTrie[] { keywordTrie, incompleteKeywordTrie }) {
synchronized (trie) {
trie.trim(new Function<IntSet, IntSet>() {
public IntSet apply(IntSet intSet) {
intSet.trim();
return intSet;
}
});
}
}
}
/**
* Returns an array of Responses that correspond to documents that have a
* match given query document.
*/
private Set<LimeXMLDocument> queryMetaDataWithRequestXml(LimeXMLDocument queryDoc) {
String schema = queryDoc.getSchemaURI();
LimeXMLReplyCollection replyCol = schemaReplyCollectionMapper.get().getReplyCollection(
schema);
if (replyCol == null)// no matching reply collection for schema
return Collections.emptySet();
return replyCol.getMatchingDocuments(queryDoc);
}
/**
* Queries metadata of shared files. This will query certain media types,
* depending on what is specified in the request.
*
* If not specified in request, query all metadata.
*
* @param request determines what to search, and which metadata is searched.
* @return
*/
private Set<LimeXMLDocument> queryMetaDataWithPlaintext(QueryRequest request) {
Collection<LimeXMLReplyCollection> schemas = getReplyCollections(request);
Set<LimeXMLDocument> documents = new IdentityHashSet<LimeXMLDocument>();
for (LimeXMLReplyCollection schemaCol : schemas) {
documents.addAll(schemaCol.getMatchingDocuments(request.getQuery()));
}
return documents;
}
private Collection<LimeXMLReplyCollection> getReplyCollections(QueryRequest request) {
MediaTypeAggregator.Aggregator filter = MediaTypeAggregator.getAggregator(request);
SchemaReplyCollectionMapper mapper = schemaReplyCollectionMapper.get();
if (filter == null) {
return mapper.getCollections();
}
Collection<MediaType> mediaTypes = filter.getMediaTypes();
List<LimeXMLReplyCollection> collections = new ArrayList<LimeXMLReplyCollection>(mediaTypes
.size());
for (MediaType mt : mediaTypes) {
// get schema uri from media type
LimeXMLReplyCollection col = mapper.getReplyCollection(getSchemaUriFromMimeType(mt
.getSchema()));
if (col != null) {
collections.add(col);
}
}
return collections;
}
private String getSchemaUriFromMimeType(String mimeType) {
Collection<LimeXMLSchema> schemas = schemaRepository.getAvailableSchemas();
for (LimeXMLSchema schema : schemas) {
if (schema.getDescription().equals(mimeType)) {
return schema.getSchemaURI();
}
}
return "";
}
private Set<Response> createResponses(Set<LimeXMLDocument> documents) {
Set<Response> responses = new HashSet<Response>(documents.size());
for (LimeXMLDocument currDoc : documents) {
File file = currDoc.getIdentifier();// returns null if none
Response res = null;
assert(file != null);
FileDesc fd = gnutellaFileView.getFileDesc(file);
if (fd == null || fd.getSHA1Urn() == null) {
// fd == null is bad -- would mean MetaFileManager is out of
// sync.
// fd incomplete should never happen, but apparently is
// somehow...
// fd is store file, shouldn't be returning query hits for
// it then..
continue;
}
// we found a file with the right name
res = responseFactory.get().createResponse(fd);
res.setDocument(null);
activityCallback.handleSharedFileUpdate(fd.getFile());
res.setDocument(currDoc);
responses.add(res);
}
return responses;
}
/**
* Enum type and context object to better organize the various steps that go
* into processing a query
* <p>
* Each QueryProcessor enum represents 1 step in processing a query.
* <p>
* Purposes of the context object are:
* <p>
* 1. Keep track of whether query processing is done<br>
* 2. Keep track of Responses found during searches so far.<br>
* <p>
* What query processing does:
* <p>
* For each QueryProcessor Enum type,
* <p>
* 1. Check context object to see if query processing is done. If so, stop
* processing 2. Should this particular query type be done? 3. If it should,
* perform the query, and add the Response objects in context object 4. If
* query processing done, set status in context object
*
*/
private enum QueryProcessor {
/**
* "What is new" search. Get up to 3 of your "youngest" files.
*/
WHATS_NEW {
@Override
void processQueryStage(QueryRequest request, QueryProcessingContext context,
SharedFilesKeywordIndexImpl keywordIndex) {
Set<Response> responses = keywordIndex.queryWhatsNew(request);
context.addQueryResponses(responses);
context.setFinishedProcessing();
}
@Override
boolean shouldProcess(QueryRequest request) {
return request.isWhatIsNewRequest();
}
},
/**
* Special case: return everything for Clip2 indexing query (" ") and
* browse queries ("*.*"). If these messages had initial TTLs too high,
* StandardMessageRouter will clip the number of results sent on the
* network. Note that some initial TTLs are filterd by GreedyQuery
* before they ever reach this point.
*/
SPECIAL_CASE_EMPTY_RESPONSE {
@Override
void processQueryStage(QueryRequest request, QueryProcessingContext context,
SharedFilesKeywordIndexImpl keywordIndex) {
context.setFinishedProcessing();
}
@Override
boolean shouldProcess(QueryRequest request) {
String str = request.getQuery();
return str.equals(QueryRequest.INDEXING_QUERY)
|| str.equals(QueryRequest.BROWSE_QUERY);
}
},
/**
* Search file name
*/
FILE_SEARCH {
@Override
void processQueryStage(QueryRequest request, QueryProcessingContext context,
SharedFilesKeywordIndexImpl keywordIndex) {
Set<Response> responses = keywordIndex.queryFileNames(request);
context.addQueryResponses(responses);
}
@Override
boolean shouldProcess(QueryRequest request) {
return true;
}
},
/**
* Search meta data
*/
METADATA_SEARCH {
@Override
void processQueryStage(QueryRequest request, QueryProcessingContext context,
SharedFilesKeywordIndexImpl keywordIndex) {
Set<Response> responses = keywordIndex.queryMetaData(request);
context.addQueryResponses(responses);
context.setFinishedProcessing();
}
@Override
boolean shouldProcess(QueryRequest request) {
return request.shouldIncludeXMLInResponse();
}
};
abstract void processQueryStage(QueryRequest request, QueryProcessingContext context,
SharedFilesKeywordIndexImpl keywordIndex);
abstract boolean shouldProcess(QueryRequest request);
public static Set<Response> processQuery(QueryRequest request,
SharedFilesKeywordIndexImpl keywordIndex) {
QueryProcessingContext contextObj = new QueryProcessingContext();
for (QueryProcessor queryProcessor : QueryProcessor.values()) {
if (queryProcessor.shouldProcess(request)) {
queryProcessor.processQueryStage(request, contextObj, keywordIndex);
if (contextObj.isFinishedProcessing()) {
break;
}
}
}
return contextObj.getResponses();
}
}
private static class QueryProcessingContext {
private boolean isTerminal;
private final Set<Response> responses;
QueryProcessingContext() {
this.responses = new HashSet<Response>();
this.isTerminal = false;
}
boolean isFinishedProcessing() {
return isTerminal;
}
Set<Response> getResponses() {
return responses;
}
void addQueryResponses(Set<Response> responses) {
this.responses.addAll(responses);
}
void setFinishedProcessing() {
isTerminal = true;
}
}
}