package proj.zoie.impl.indexing.internal;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import org.apache.log4j.Logger;
import org.apache.lucene.index.IndexReader;
import proj.zoie.api.DataConsumer;
import proj.zoie.api.LifeCycleCotrolledDataConsumer;
import proj.zoie.api.ZoieException;
import proj.zoie.api.ZoieHealth;
import proj.zoie.api.indexing.AbstractZoieIndexable;
import proj.zoie.api.indexing.IndexingEventListener;
import proj.zoie.api.indexing.IndexingEventListener.IndexingEvent;
import proj.zoie.api.indexing.ZoieIndexable;
import proj.zoie.api.indexing.ZoieIndexableInterpreter;
import proj.zoie.impl.indexing.IndexUpdatedEvent;
import proj.zoie.impl.indexing.IndexingThread;
/**
* Runs a background thread that flushes incoming data events in batch to the background DataConsumer.
* Incoming data is buffered first.
* A flush is carried out when the batch size is significant,
* a client requesting a flush, or significant amount of time has passed.
* The data is flushed to the underlying dataloader, which is a DataConsumer.
* When incoming data comes in too fast, the thread sending data will be put on hold.
* This acts as incoming data throttling.
*
* @param <R>
* @param <D>
*/
public class BatchedIndexDataLoader<R extends IndexReader, D> implements
LifeCycleCotrolledDataConsumer<D> {
protected int _batchSize;
protected long _delay;
protected final DataConsumer<ZoieIndexable> _dataLoader;
protected List<DataEvent<ZoieIndexable>> _batchList;
protected final LoaderThread _loadMgrThread;
protected long _lastFlushTime;
protected int _eventCount;
protected int _maxBatchSize;
protected volatile boolean _stop;
protected boolean _flush;
protected final SearchIndexManager<R> _idxMgr;
protected final ZoieIndexableInterpreter<D> _interpreter;
private final Queue<IndexingEventListener> _lsnrList;
private static Logger log = Logger.getLogger(BatchedIndexDataLoader.class);
/**
* @param dataLoader
* @param batchSize
* @param maxBatchSize
* @param delay
* @param idxMgr
* @param lsnrList the list of IndexingEventListeners. This should be a <b>Synchronized</b> list if the content of this list is mutable.
*/
public BatchedIndexDataLoader(DataConsumer<ZoieIndexable> dataLoader, int batchSize,
int maxBatchSize, long delay, SearchIndexManager<R> idxMgr,
ZoieIndexableInterpreter<D> interpreter, Queue<IndexingEventListener> lsnrList) {
_maxBatchSize = Math.max(maxBatchSize, batchSize);
_batchSize = Math.min(batchSize, _maxBatchSize);
_delay = delay;
_dataLoader = dataLoader;
_batchList = new LinkedList<DataEvent<ZoieIndexable>>();
_lastFlushTime = 0L;
_eventCount = 0;
_loadMgrThread = new LoaderThread();
_loadMgrThread.setName("disk indexer data loader");
_stop = false;
_flush = false;
_idxMgr = idxMgr;
_interpreter = interpreter;
_lsnrList = lsnrList;
log.info("constructor: _maxBatchSize: " + _maxBatchSize + " _batchSize: " + _batchSize
+ " _delay: " + _delay);
}
protected final void fireIndexingEvent(IndexingEvent evt) {
if (_lsnrList != null && _lsnrList.size() > 0) {
synchronized (_lsnrList) {
for (IndexingEventListener lsnr : _lsnrList) {
try {
lsnr.handleIndexingEvent(evt);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
}
}
}
protected final void fireNewVersionEvent(String newVersion) {
if (_lsnrList != null && _lsnrList.size() > 0) {
synchronized (_lsnrList) {
for (IndexingEventListener lsnr : _lsnrList) {
try {
lsnr.handleUpdatedDiskVersion(newVersion);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
}
}
}
public synchronized int getMaxBatchSize() {
return _maxBatchSize;
}
public synchronized void setMaxBatchSize(int maxBatchSize) {
_maxBatchSize = Math.max(maxBatchSize, _batchSize);
_batchSize = Math.min(_batchSize, _maxBatchSize);
log.info("setMaxBatchSize: " + _maxBatchSize);
}
public synchronized int getBatchSize() {
return _batchSize;
}
public synchronized void setBatchSize(int batchSize) {
_batchSize = Math.min(Math.max(1, batchSize), _maxBatchSize);
log.info("setBatchSize: " + _batchSize);
}
public synchronized long getDelay() {
return _delay;
}
public synchronized void setDelay(long delay) {
_delay = delay;
log.info("setDelay: " + _delay);
}
public synchronized int getEventCount() {
return _eventCount;
}
/**
*
* @see proj.zoie.api.DataConsumer#consume(java.util.Collection)
*/
@Override
public void consume(Collection<DataEvent<D>> events) throws ZoieException {
if (events != null) {
ArrayList<DataEvent<ZoieIndexable>> indexableList = new ArrayList<DataEvent<ZoieIndexable>>(
events.size());
Iterator<DataEvent<D>> iter = events.iterator();
while (iter.hasNext()) {
try {
DataEvent<D> event = iter.next();
ZoieIndexable indexable = _interpreter
.convertAndInterpret(event.getData());
DataEvent<ZoieIndexable> newEvent = new DataEvent<ZoieIndexable>(indexable,
event.getVersion(), event.isDelete());
indexableList.add(newEvent);
} catch (Exception e) {
ZoieHealth.setFatal();
log.error(e.getMessage(), e);
}
}
synchronized (this) // this blocks the batch disk loader thread while indexing to RAM
{
while (_batchList.size() > _maxBatchSize) {
// check if load manager thread is alive
if (_loadMgrThread == null || !_loadMgrThread.isAlive()) {
throw new ZoieException("fatal: indexing thread loader manager has stopped");
}
try {
this.wait(60000); // 1 min
} catch (InterruptedException e) {
continue;
}
}
_eventCount += indexableList.size();
_batchList.addAll(indexableList);
this.notifyAll();
}
}
}
public synchronized int getCurrentBatchSize() {
return (_batchList != null ? _batchList.size() : 0);
}
/**
* This method needs to be called within a synchronized block on 'this'.
* @return the list of data events already received. A new list is created to receive new data events.
*/
protected List<DataEvent<ZoieIndexable>> getBatchList() {
List<DataEvent<ZoieIndexable>> tmpList = _batchList;
_batchList = new LinkedList<DataEvent<ZoieIndexable>>();
return tmpList;
}
/**
* Wait for timeOut amount of time for the indexing thread to process data events.
* If there are still remaining unprocessed events by the end of timeOut duration,
* a ZoieException is thrown.
* @param timeOut a timeout value in milliseconds.
* @throws ZoieException
*/
public void flushEvents(long timeOut) throws ZoieException {
synchronized (this) {
while (_eventCount > 0) {
_flush = true;
this.notifyAll();
long now1 = System.currentTimeMillis();
if (timeOut <= 0) {
log.error("sync timed out");
throw new ZoieException("timed out");
}
try {
long waittime = Math.min(200, timeOut);
this.wait(waittime);
} catch (InterruptedException e) {
throw new ZoieException(e.getMessage());
}
long now2 = System.currentTimeMillis();
timeOut -= (now2 - now1);
}
}
}
/**
* Used by the indexing thread to flush incoming data events in batch.
* A flush is carried out when the batch size is significant,
* a client requesting a flush, or significant amount of time has passed.
* The data is flushed to the underlying dataloader, which is a DataConsumer.
*/
protected void processBatch() {
List<DataEvent<ZoieIndexable>> tmpList = null;
long now = System.currentTimeMillis();
long duration = now - _lastFlushTime;
String currentVersion;
try {
currentVersion = _idxMgr.getCurrentDiskVersion();
} catch (IOException ioe) {
currentVersion = null;
}
synchronized (this) {
while (_batchList.size() < _batchSize && !_stop && !_flush && duration < _delay) {
try {
this.wait(_delay - duration);
} catch (InterruptedException e) {
log.warn(e.getMessage());
}
now = System.currentTimeMillis();
duration = now - _lastFlushTime;
}
_flush = false;
_lastFlushTime = now;
if (_batchList.size() > 0) {
// change the status and get the batch list
// this has to be done in the block synchronized on BatchIndexDataLoader
_idxMgr.setDiskIndexerStatus(SearchIndexManager.Status.Working);
tmpList = getBatchList();
}
}
if (tmpList != null) {
long t1 = System.currentTimeMillis();
int eventCount = tmpList.size();
Comparator<String> versioComparator = _idxMgr.getVersionComparator();
for (DataEvent<ZoieIndexable> evt : tmpList) {
String newVersion = evt.getVersion();
if (currentVersion == null) {
currentVersion = newVersion;
} else {
if (versioComparator.compare(currentVersion, newVersion) < 0) {
currentVersion = newVersion;
}
}
}
try {
_dataLoader.consume(tmpList);
} catch (ZoieException e) {
ZoieHealth.setFatal();
log.error(e.getMessage(), e);
} finally {
long t2 = System.currentTimeMillis();
synchronized (this) {
_eventCount -= eventCount;
this.notifyAll();
log.info(this + " flushed batch of " + eventCount + " events to disk indexer, took: "
+ (t2 - t1) + " current event count: " + _eventCount);
IndexUpdatedEvent evt = new IndexUpdatedEvent(eventCount, t1, t2, _eventCount);
fireIndexingEvent(evt);
try {
String oldVersion = _idxMgr.getCurrentDiskVersion();
if (currentVersion != null && !currentVersion.equals(oldVersion)) {
fireNewVersionEvent(currentVersion);
}
} catch (IOException ioe) {
log.error(ioe.getMessage(), ioe);
}
}
}
} else {
log.debug("batch size is 0");
}
}
protected class LoaderThread extends IndexingThread {
LoaderThread() {
super("disk indexer data loader");
}
@Override
public void run() {
while (!_stop) {
processBatch();
}
}
}
/**
* Starts the build-in indexing thread.
*/
@Override
public void start() {
_loadMgrThread.setName(String.valueOf(this));
_loadMgrThread.start();
}
/**
* Shutdown the build-in indexing thread and wait until it dies.
*/
@Override
public void stop() {
synchronized (this) {
_stop = true;
this.notifyAll();
}
try {
_loadMgrThread.join();
} catch (InterruptedException e) {
log.error(e.getMessage(), e);
}
}
protected static class ZoieIndexableDecorator extends AbstractZoieIndexable {
private final ZoieIndexable _inner;
private ZoieIndexableDecorator(ZoieIndexable inner) {
_inner = inner;
}
public static ZoieIndexableDecorator decorate(ZoieIndexable inner) {
return (inner == null ? null : new ZoieIndexableDecorator(inner));
}
@Override
public IndexingReq[] buildIndexingReqs() {
return _inner.buildIndexingReqs();
}
@Override
public long getUID() {
return _inner.getUID();
}
@Override
public boolean isDeleted() {
return _inner.isDeleted();
}
@Override
public boolean isSkip() {
return _inner.isSkip();
}
}
@Override
public String getVersion() {
throw new UnsupportedOperationException();
}
@Override
public Comparator<String> getVersionComparator() {
throw new UnsupportedOperationException();
}
}