/**
* OLAT - Online Learning and Training<br>
* http://www.olat.org
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Copyright (c) since 2004 at Multimedia- & E-Learning Services (MELS),<br>
* University of Zurich, Switzerland.
* <hr>
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* This file has been modified by the OpenOLAT community. Changes are licensed
* under the Apache 2.0 license as the original file.
*/
package org.olat.search.service.indexer;
import java.io.File;
import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;
import java.util.Hashtable;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.LucenePackage;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.olat.core.commons.persistence.DBFactory;
import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing;
import org.olat.core.util.coordinate.CoordinatorManager;
import org.olat.search.QueryException;
import org.olat.search.SearchModule;
import org.olat.search.SearchService;
import org.olat.search.ServiceNotAvailableException;
import org.olat.search.model.OlatDocument;
import org.olat.search.service.SearchResourceContext;
/**
* Controls the hole generation of a full-index. It run in its own thread the main index.
* The sub-indexers can use a thread pool to parallelize the works.
*
* @author Christian Guretzki
*/
public class OlatFullIndexer {
private static final OLog log = Tracing.createLoggerFor(OlatFullIndexer.class);
private static final int INDEX_MERGE_FACTOR = 1000;
private static final int MAX_WAITING_COUNT = 600;// = 10Min
private static final IndexerThreadFactory indexWriterThreadFactory = new IndexerThreadFactory("writer");
private static final IndexerThreadFactory indexWorkersThreadFactory = new IndexerThreadFactory("worker");
private String indexPath;
private String tempIndexPath;
/**
* Reference to indexer for done callback.
*/
private Index index;
private IndexWriter indexWriter;
/** Flag to stop indexing. */
private boolean stopIndexing;
/** When restartIndexingWhenFinished is true, the restart interval in ms can be set. */
private long indexInterval = 500;
private double ramBufferSizeMB;
private final int indexerPoolSize;
/** Current status of full-indexer. */
private FullIndexerStatus fullIndexerStatus;
/** Used to build number of indexed documents per minute. */
private long lastMinute;
private int currentMinuteCounter;
/* Define number of documents which will be added befor sleeping (indexInterval for CPU load). */
int documentsPerInterval;
/* Counts added documents in indexInterval. */
private int sleepDocumentCounter = 0;
/* List of Integer objects to count number of docs for each type. Key = document-type. */
private Map<String,Integer> documentCounters;
private Map<String,Integer> fileTypeCounters;
private final MainIndexer mainIndexer;
private final SearchService searchService;
private final CoordinatorManager coordinatorManager;
private static final Object indexerWriterBlock = new Object();
private ThreadPoolExecutor indexerExecutor;
private ThreadPoolExecutor indexerWriterExecutor;
/**
*
* @param tempIndexPath Absolute file path to temporary index directory.
* @param index Reference to index object.
* @param restartInterval Restart interval in milliseconds.
* @param indexInterval Sleep time in milliseconds between adding documents.
*/
public OlatFullIndexer(Index index, SearchModule searchModule, SearchService searchService,
MainIndexer mainIndexer, CoordinatorManager coordinatorManager) {
this.index = index;
this.mainIndexer = mainIndexer;
this.searchService = searchService;
this.coordinatorManager = coordinatorManager;
// -1 because the thread pool used a CallerRunPolicy, which means the main thread
// will do the work if the queue of the poll is full.
if(searchModule.getFolderPoolSize() <= 2) {
indexerPoolSize = 1;
} else {
indexerPoolSize = searchModule.getFolderPoolSize() - 1;
}
indexPath = searchModule.getFullIndexPath();
tempIndexPath = searchModule.getFullTempIndexPath();
indexInterval = searchModule.getIndexInterval();
documentsPerInterval = searchModule.getDocumentsPerInterval();
ramBufferSizeMB = searchModule.getRAMBufferSizeMB();
fullIndexerStatus = new FullIndexerStatus(1);
stopIndexing = true;
initStatus();
resetDocumentCounters();
}
private void initStatus() {
File indexDir = new File(indexPath);
if (indexDir.exists()) {
final AtomicLong last = new AtomicLong(1);
try {
Files.walkFileTree(indexDir.toPath(), new SimpleFileVisitor<Path>(){
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
if(attrs.isRegularFile()) {
FileTime time = attrs.lastModifiedTime();
long timeInMillis = time.toMillis();
if(timeInMillis > 0 && last.longValue() < timeInMillis) {
last.set(timeInMillis);
}
}
return FileVisitResult.CONTINUE;
}
});
} catch (IOException e) {
log.error("", e);
}
fullIndexerStatus.setLastFullIndexTime(last.get());
} else {
fullIndexerStatus.setLastFullIndexTime(1);
}
}
/**
* Start full indexer thread.
*/
public void startIndexing() {
// Start updateThread
if (stopIndexing) {
log.info("start full indexing thread...");
stopIndexing = false;
resetDocumentCounters();
run();
}
}
/**
* Stop full indexer thread asynchron.
*/
public void stopIndexing() {
stopIndexing = true;
if (log.isDebug()) log.debug("stop current indexing when");
}
public LogMergePolicy newLogMergePolicy() {
LogMergePolicy logmp = new LogDocMergePolicy();
logmp.setCalibrateSizeByDeletes(true);
logmp.setMergeFactor(INDEX_MERGE_FACTOR);
return logmp;
}
public IndexWriterConfig newIndexWriterConfig() {
Analyzer analyzer = new StandardAnalyzer(SearchService.OO_LUCENE_VERSION);
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
indexWriterConfig.setMergePolicy(newLogMergePolicy());
indexWriterConfig.setRAMBufferSizeMB(ramBufferSizeMB);// for better performance set to 48MB (see lucene docu 'how to make indexing faster")
indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
return indexWriterConfig;
}
/**
* Create index-writer object. In multi-threaded mode ctreates an array of index-workers.
* Start indexing with main-index as root object. Index recursive all elements.
* At the end optimize and close new index.
* The new index is stored in [temporary-index-path]/main
* @throws InterruptedException
*/
private void doIndex() throws InterruptedException{
try {
if(indexerExecutor == null) {
BlockingQueue<Runnable> queue = new LinkedBlockingQueue<Runnable>(2);
indexerExecutor = new ThreadPoolExecutor(indexerPoolSize, indexerPoolSize, 0L, TimeUnit.MILLISECONDS,
queue, indexWorkersThreadFactory, new ThreadPoolExecutor.CallerRunsPolicy());
}
if(indexerWriterExecutor == null) {
BlockingQueue<Runnable> queue = new LinkedBlockingQueue<Runnable>(2);
indexerWriterExecutor = new ThreadPoolExecutor(1, 1, 0L, TimeUnit.MILLISECONDS, queue, indexWriterThreadFactory);
}
File tempIndexDir = new File(tempIndexPath);
Directory tmpIndexPath = FSDirectory.open(new File(tempIndexDir, "main"));
indexWriter = new IndexWriter(tmpIndexPath, newIndexWriterConfig());// analyzer, true, IndexWriter.MAX_TERM_LENGTH.UNLIMITED);
indexWriter.deleteAll();
SearchResourceContext searchResourceContext = new SearchResourceContext();
log.info("doIndex start. OlatFullIndexer with Debug output");
mainIndexer.doIndex(searchResourceContext, null /*no parent*/, this);
DBFactory.getInstance().commitAndCloseSession();
log.info("Wait until every folder indexer is finished");
indexerExecutor.shutdown();
indexerExecutor.awaitTermination(10, TimeUnit.MINUTES);
DBFactory.getInstance().commitAndCloseSession();
log.info("Wait until index writer executor is finished");
int waitWriter = 0;
while (indexerWriterExecutor.getActiveCount() > 0 && (waitWriter++ < MAX_WAITING_COUNT)) {
Thread.sleep(1000);
}
log.info("Close index writer executor");
fullIndexerStatus.setIndexSize(indexWriter.maxDoc());
//shutdown the index writer thread
indexerWriterExecutor.submit(new CloseIndexCallable());
indexerWriterExecutor.shutdown();
indexerWriterExecutor.awaitTermination(1, TimeUnit.MINUTES);
} catch (IOException e) {
log.warn("Can not create IndexWriter, indexname=" + tempIndexPath, e);
} finally {
DBFactory.getInstance().commitAndCloseSession();
log.debug("doIndex: commit & close session");
if(indexerExecutor != null) {
indexerExecutor.shutdownNow();
indexerExecutor = null;
}
if(indexerWriterExecutor != null) {
indexerWriterExecutor.shutdownNow();
indexerWriterExecutor = null;
}
}
}
public Future<Boolean> submit(Callable<Boolean> task) {
if(indexerExecutor != null && !indexerExecutor.isShutdown()) {
return indexerExecutor.submit(task);
} else {
log.error("Try to submit a task to index executor but it's closed.");
return null;
}
}
/**
*
* @see java.lang.Runnable#run()
*/
public void run() {
try {
log.info("full indexing starts... Lucene-version:" + LucenePackage.get().getImplementationVersion());
fullIndexerStatus.indexingStarted();
doIndex();
index.indexingIsDone();
fullIndexerStatus.indexingFinished();
log.info("full indexing done in " + fullIndexerStatus.getIndexingTime() + "ms");
//created because the index is deleted and copied
IndexerEvent event = new IndexerEvent(IndexerEvent.INDEX_CREATED);
coordinatorManager.getCoordinator().getEventBus().fireEventToListenersOf(event, IndexerEvent.INDEX_ORES);
//OLAT-5630 - dump more infos about the indexer run - for analysis later
FullIndexerStatus status = getStatus();
log.info("full indexing summary: started: "+status.getFullIndexStartedAt());
log.info("full indexing summary: counter: "+status.getDocumentCount());
log.info("full indexing summary: index.per.minute: "+status.getIndexPerMinute());
log.info("full indexing summary: finished: "+status.getLastFullIndexDateString());
log.info("full indexing summary: time: "+status.getIndexingTime()+" ms");
log.info("full indexing summary: size: "+status.getIndexSize());
log.info("full indexing summary: document counters: "+status.getDocumentCounters());
log.info("full indexing summary: file type counters:"+status.getFileTypeCounters());
log.info("full indexing summary: excluded counter: "+status.getExcludedDocumentCount());
} catch(InterruptedException iex) {
log.info("FullIndexer was interrupted ;" + iex.getMessage());
} catch(Throwable ex) {
try {
log.error("Error during full-indexing:" + ex.getMessage() , ex);
} catch (NullPointerException nex) {
// no logging available (shut down) => do nothing
}
}
fullIndexerStatus.setStatus(FullIndexerStatus.STATUS_STOPPED);
stopIndexing = true;
try {
log.info("quit indexing run.");
} catch (NullPointerException nex) {
// no logging available (shut down)=> do nothing
}
}
public Document getDocument(String businessPath) {
try {
return searchService.doSearch(businessPath);
} catch (ServiceNotAvailableException | ParseException | QueryException e) {
return null;
}
}
/**
* Add a document to the index writer. The document is indexed by a single threaded executor,
* Lucene want that write operations happen within a single thread. The access is synchronized
* to block concurrent access to the executor. It blocks the text extractors and allow a
* ridiculously small queue but memory efficient.
*
* @param document
* @throws IOException
*/
public void addDocument(Document document) throws IOException,InterruptedException {
DBFactory.getInstance().commitAndCloseSession();
if (!stopIndexing && indexerWriterExecutor != null && !indexerWriterExecutor.isShutdown()) {
synchronized(indexerWriterBlock) {//once at a time please, wait, you have enough time
Future<Boolean> future = indexerWriterExecutor.submit(new AddDocumentCallable(document));
try {
future.get();
} catch (ExecutionException e) {
log.error("", e);
}
}
}
incrementDocumentTypeCounter(document);
incrementFileTypeCounter(document);
fullIndexerStatus.setNumberAvailableFolderIndexer(indexerExecutor.getPoolSize());
fullIndexerStatus.setNumberRunningFolderIndexer(indexerExecutor.getActiveCount());
}
private void incrementFileTypeCounter(Document document) {
String fileType = document.get(OlatDocument.FILETYPE_FIELD_NAME);
if ( (fileType != null) && (!fileType.equals(""))) {
int intValue = 0;
if (fileTypeCounters.containsKey(fileType)) {
Integer fileCounter = fileTypeCounters.get(fileType);
intValue = fileCounter.intValue();
}
intValue++;
fileTypeCounters.put(fileType, new Integer(intValue));
}
}
private void incrementDocumentTypeCounter(Document document) {
String documentType = document.get(OlatDocument.DOCUMENTTYPE_FIELD_NAME);
int intValue = 0;
if (documentCounters.containsKey(documentType)) {
Integer docCounter = documentCounters.get(documentType);
intValue = docCounter.intValue();
}
intValue++;
documentCounters.put(documentType, new Integer(intValue));
}
private void countIndexPerMinute() {
long currentTime = System.currentTimeMillis();
if (lastMinute+60000 > currentTime) {
// it is teh same minute
currentMinuteCounter++;
} else {
fullIndexerStatus.setIndexPerMinute(currentMinuteCounter);
currentMinuteCounter = 0;
if (lastMinute+120000 > currentTime) {
lastMinute = lastMinute+60000;
} else {
lastMinute = currentTime;
}
}
}
/**
* @return Return current full-indexer status.
*/
public FullIndexerStatus getStatus() {
fullIndexerStatus.setDocumentCounters(documentCounters);
fullIndexerStatus.setFileTypeCounters(fileTypeCounters);
fullIndexerStatus.setDocumentQueueSize(0);
return fullIndexerStatus;
}
public long getIndexInterval() {
return indexInterval;
}
/**
* @param indexInterval The indexInterval to set.
*/
public void setIndexInterval(long indexInterval) {
this.indexInterval = indexInterval;
}
/**
* Check if the indexing process is interrupted.
* @return TRUE: indexing process is interrupted.
*/
public boolean isInterupted() {
return stopIndexing;
}
private void resetDocumentCounters() {
documentCounters = new Hashtable<String,Integer>();
fileTypeCounters = new Hashtable<String,Integer>();
}
private class CloseIndexCallable implements Callable<Boolean> {
@Override
public Boolean call() throws Exception {
indexWriter.commit();
indexWriter.close();
indexWriter = null;
return Boolean.TRUE;
}
}
private class AddDocumentCallable implements Callable<Boolean> {
private final Document document;
public AddDocumentCallable(Document document) {
this.document = document;
}
@Override
public Boolean call() throws Exception {
indexWriter.addDocument(document);
fullIndexerStatus.incrementDocumentCount();
if (indexInterval != 0 && sleepDocumentCounter++ >= documentsPerInterval) {
sleepDocumentCounter = 0;
Thread.sleep(indexInterval);
} else if (stopIndexing) {
throw new InterruptedException("Do stop indexing at element=" + indexWriter.maxDoc());
}
countIndexPerMinute();
return Boolean.TRUE;
}
}
private static class IndexerThreadFactory implements ThreadFactory {
private static final AtomicInteger poolNumber = new AtomicInteger(1);
private final ThreadGroup group;
private final AtomicInteger threadNumber = new AtomicInteger(1);
private final String namePrefix;
IndexerThreadFactory(String prefix) {
SecurityManager s = System.getSecurityManager();
group = (s != null) ? s.getThreadGroup() : Thread.currentThread().getThreadGroup();
namePrefix = "index-" + prefix + "-" +
poolNumber.getAndIncrement() +
"-thread-";
}
public Thread newThread(Runnable r) {
Thread t = new Thread(group, r, namePrefix + threadNumber.getAndIncrement(), 0);
if (t.isDaemon()) {
t.setDaemon(false);
}
if (t.getPriority() != Thread.MIN_PRIORITY) {
t.setPriority(Thread.MIN_PRIORITY);
}
return t;
}
}
}