/*******************************************************************************
* Copyright (c) 2012-2017 Codenvy, S.A.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Codenvy, S.A. - initial API and implementation
*******************************************************************************/
package org.eclipse.che.api.vfs.search.impl;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.eclipse.che.api.core.ForbiddenException;
import org.eclipse.che.api.core.ServerException;
import org.eclipse.che.api.vfs.VirtualFile;
import org.eclipse.che.api.vfs.VirtualFileFilter;
import org.eclipse.che.api.vfs.VirtualFileFilters;
import org.eclipse.che.api.vfs.VirtualFileSystem;
import org.eclipse.che.api.vfs.search.MediaTypeFilter;
import org.eclipse.che.api.vfs.search.QueryExpression;
import org.eclipse.che.api.vfs.search.SearchResult;
import org.eclipse.che.api.vfs.search.SearchResultEntry;
import org.eclipse.che.api.vfs.search.Searcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import static com.google.common.collect.Lists.newArrayList;
/**
* Lucene based searcher.
*
* @author andrew00x
*/
public abstract class LuceneSearcher implements Searcher {
private static final Logger LOG = LoggerFactory.getLogger(LuceneSearcher.class);
private static final int RESULT_LIMIT = 1000;
private static final String PATH_FIELD = "path";
private static final String NAME_FIELD = "name";
private static final String TEXT_FIELD = "text";
private final List<VirtualFileFilter> excludeFileIndexFilters;
private final AbstractLuceneSearcherProvider.CloseCallback closeCallback;
private IndexWriter luceneIndexWriter;
private SearcherManager searcherManager;
private boolean closed = true;
protected LuceneSearcher() {
this(new MediaTypeFilter(), null);
}
protected LuceneSearcher(AbstractLuceneSearcherProvider.CloseCallback closeCallback) {
this(new MediaTypeFilter(), closeCallback);
}
/**
* @param excludeFileIndexFilter
* common filter for files that should not be indexed. If complex excluding rules needed then few filters might be combined
* with {@link VirtualFileFilters#createAndFilter} or {@link VirtualFileFilters#createOrFilter} methods
*/
protected LuceneSearcher(VirtualFileFilter excludeFileIndexFilter, AbstractLuceneSearcherProvider.CloseCallback closeCallback) {
this.closeCallback = closeCallback;
excludeFileIndexFilters = new CopyOnWriteArrayList<>();
excludeFileIndexFilters.add(excludeFileIndexFilter);
}
@Override
public boolean addIndexFilter(VirtualFileFilter indexFilter) {
return excludeFileIndexFilters.add(indexFilter);
}
@Override
public boolean removeIndexFilter(VirtualFileFilter indexFilter) {
return excludeFileIndexFilters.remove(indexFilter);
}
protected Analyzer makeAnalyzer() {
return new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new WhitespaceTokenizer();
TokenStream filter = new LowerCaseFilter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}
};
}
protected abstract Directory makeDirectory() throws ServerException;
/**
* Init lucene index. Need call this method if index directory is clean. Scan all files in virtual filesystem and add to index.
*
* @param virtualFileSystem
* VirtualFileSystem
* @throws ServerException
* if any virtual filesystem error occurs
*/
public void init(VirtualFileSystem virtualFileSystem) throws ServerException {
doInit();
addTree(virtualFileSystem.getRoot());
}
public void initAsynchronously(ExecutorService executor, VirtualFileSystem virtualFileSystem) throws ServerException {
doInit();
if (!executor.isShutdown()) {
executor.execute(() -> {
try {
LuceneSearcher.this.addTree(virtualFileSystem.getRoot());
} catch (ServerException e) {
LOG.error(e.getMessage());
}
});
}
}
protected final synchronized void doInit() throws ServerException {
try {
luceneIndexWriter = new IndexWriter(makeDirectory(), new IndexWriterConfig(makeAnalyzer()));
searcherManager = new SearcherManager(luceneIndexWriter, true, new SearcherFactory());
closed = false;
} catch (IOException e) {
throw new ServerException(e);
}
}
public final synchronized void close() {
if (!closed) {
try {
IOUtils.close(getIndexWriter(), getIndexWriter().getDirectory(), searcherManager);
afterClose();
} catch (IOException e) {
LOG.error(e.getMessage(), e);
}
closed = true;
}
}
protected void afterClose() throws IOException {
if (closeCallback != null) {
closeCallback.onClose();
}
}
@Override
public synchronized boolean isClosed() {
return closed;
}
public synchronized IndexWriter getIndexWriter() {
return luceneIndexWriter;
}
@Override
public SearchResult search(QueryExpression query) throws ServerException {
IndexSearcher luceneSearcher = null;
try {
final long startTime = System.currentTimeMillis();
searcherManager.maybeRefresh();
luceneSearcher = searcherManager.acquire();
Query luceneQuery = createLuceneQuery(query);
ScoreDoc after = null;
final int numSkipDocs = Math.max(0, query.getSkipCount());
if (numSkipDocs > 0) {
after = skipScoreDocs(luceneSearcher, luceneQuery, numSkipDocs);
}
final int numDocs = query.getMaxItems() > 0 ? Math.min(query.getMaxItems(), RESULT_LIMIT) : RESULT_LIMIT;
TopDocs topDocs = luceneSearcher.searchAfter(after, luceneQuery, numDocs);
final int totalHitsNum = topDocs.totalHits;
List<SearchResultEntry> results = newArrayList();
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
String filePath = luceneSearcher.doc(scoreDoc.doc).getField(PATH_FIELD).stringValue();
results.add(new SearchResultEntry(filePath));
}
final long elapsedTimeMillis = System.currentTimeMillis() - startTime;
boolean hasMoreToRetrieve = numSkipDocs + topDocs.scoreDocs.length + 1 < totalHitsNum;
QueryExpression nextPageQueryExpression = null;
if (hasMoreToRetrieve) {
nextPageQueryExpression = createNextPageQuery(query, numSkipDocs + topDocs.scoreDocs.length);
}
return SearchResult.aSearchResult()
.withResults(results)
.withTotalHits(totalHitsNum)
.withNextPageQueryExpression(nextPageQueryExpression)
.withElapsedTimeMillis(elapsedTimeMillis)
.build();
} catch (IOException | ParseException e) {
throw new ServerException(e.getMessage(), e);
} finally {
try {
searcherManager.release(luceneSearcher);
} catch (IOException e) {
LOG.error(e.getMessage());
}
}
}
private Query createLuceneQuery(QueryExpression query) throws ParseException {
final BooleanQuery luceneQuery = new BooleanQuery();
final String name = query.getName();
final String path = query.getPath();
final String text = query.getText();
if (path != null) {
luceneQuery.add(new PrefixQuery(new Term(PATH_FIELD, path)), BooleanClause.Occur.MUST);
}
if (name != null) {
QueryParser qParser = new QueryParser(NAME_FIELD, makeAnalyzer());
qParser.setAllowLeadingWildcard(true);
luceneQuery.add(qParser.parse(name), BooleanClause.Occur.MUST);
}
if (text != null) {
QueryParser qParser = new QueryParser(TEXT_FIELD, makeAnalyzer());
qParser.setAllowLeadingWildcard(true);
luceneQuery.add(qParser.parse(text), BooleanClause.Occur.MUST);
}
return luceneQuery;
}
private ScoreDoc skipScoreDocs(IndexSearcher luceneSearcher, Query luceneQuery, int numSkipDocs) throws IOException {
final int readFrameSize = Math.min(numSkipDocs, RESULT_LIMIT);
ScoreDoc scoreDoc = null;
int retrievedDocs = 0;
TopDocs topDocs;
do {
topDocs = luceneSearcher.searchAfter(scoreDoc, luceneQuery, readFrameSize);
if (topDocs.scoreDocs.length > 0) {
scoreDoc = topDocs.scoreDocs[topDocs.scoreDocs.length - 1];
}
retrievedDocs += topDocs.scoreDocs.length;
} while (retrievedDocs < numSkipDocs && topDocs.scoreDocs.length > 0);
if (retrievedDocs > numSkipDocs) {
int lastScoreDocIndex = topDocs.scoreDocs.length - (retrievedDocs - numSkipDocs);
scoreDoc = topDocs.scoreDocs[lastScoreDocIndex];
}
return scoreDoc;
}
private QueryExpression createNextPageQuery(QueryExpression originalQuery, int newSkipCount) {
return new QueryExpression().setText(originalQuery.getText())
.setName(originalQuery.getName())
.setPath(originalQuery.getPath())
.setSkipCount(newSkipCount)
.setMaxItems(originalQuery.getMaxItems());
}
@Override
public final void add(VirtualFile virtualFile) throws ServerException {
doAdd(virtualFile);
}
protected void doAdd(VirtualFile virtualFile) throws ServerException {
if (virtualFile.isFolder()) {
addTree(virtualFile);
} else {
addFile(virtualFile);
}
}
protected void addTree(VirtualFile tree) throws ServerException {
final long start = System.currentTimeMillis();
final LinkedList<VirtualFile> q = new LinkedList<>();
q.add(tree);
int indexedFiles = 0;
while (!q.isEmpty()) {
final VirtualFile folder = q.pop();
if (folder.exists()) {
for (VirtualFile child : folder.getChildren()) {
if (child.isFolder()) {
q.push(child);
} else {
addFile(child);
indexedFiles++;
}
}
}
}
final long end = System.currentTimeMillis();
LOG.debug("Indexed {} files from {}, time: {} ms", indexedFiles, tree.getPath(), (end - start));
}
protected void addFile(VirtualFile virtualFile) throws ServerException {
if (virtualFile.exists()) {
try (Reader fContentReader = shouldIndexContent(virtualFile)
? new BufferedReader(new InputStreamReader(virtualFile.getContent()))
: null) {
getIndexWriter().updateDocument(new Term(PATH_FIELD, virtualFile.getPath().toString()),
createDocument(virtualFile, fContentReader));
} catch (OutOfMemoryError oome) {
close();
throw oome;
} catch (IOException e) {
throw new ServerException(e.getMessage(), e);
} catch (ForbiddenException e) {
throw new ServerException(e.getServiceError());
}
}
}
@Override
public final void delete(String path, boolean isFile) throws ServerException {
try {
if (isFile) {
Term term = new Term(PATH_FIELD, path);
getIndexWriter().deleteDocuments(term);
} else {
Term term = new Term(PATH_FIELD, path + '/');
getIndexWriter().deleteDocuments(new PrefixQuery(term));
}
} catch (OutOfMemoryError oome) {
close();
throw oome;
} catch (IOException e) {
throw new ServerException(e.getMessage(), e);
}
}
@Override
public final void update(VirtualFile virtualFile) throws ServerException {
doUpdate(new Term(PATH_FIELD, virtualFile.getPath().toString()), virtualFile);
}
protected void doUpdate(Term deleteTerm, VirtualFile virtualFile) throws ServerException {
try (Reader fContentReader = shouldIndexContent(virtualFile)
? new BufferedReader(new InputStreamReader(virtualFile.getContent()))
: null) {
getIndexWriter().updateDocument(deleteTerm, createDocument(virtualFile, fContentReader));
} catch (OutOfMemoryError oome) {
close();
throw oome;
} catch (IOException e) {
throw new ServerException(e.getMessage(), e);
} catch (ForbiddenException e) {
throw new ServerException(e.getServiceError());
}
}
protected Document createDocument(VirtualFile virtualFile, Reader reader) throws ServerException {
final Document doc = new Document();
doc.add(new StringField(PATH_FIELD, virtualFile.getPath().toString(), Field.Store.YES));
doc.add(new TextField(NAME_FIELD, virtualFile.getName(), Field.Store.YES));
if (reader != null) {
doc.add(new TextField(TEXT_FIELD, reader));
}
return doc;
}
private boolean shouldIndexContent(VirtualFile virtualFile) {
for (VirtualFileFilter indexFilter : excludeFileIndexFilters) {
if (indexFilter.accept(virtualFile)) {
return false;
}
}
return true;
}
}