package proj.zoie.impl.indexing.internal; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Queue; import org.apache.log4j.Logger; import org.apache.lucene.index.IndexReader; import proj.zoie.api.DataConsumer; import proj.zoie.api.LifeCycleCotrolledDataConsumer; import proj.zoie.api.ZoieException; import proj.zoie.api.ZoieHealth; import proj.zoie.api.indexing.AbstractZoieIndexable; import proj.zoie.api.indexing.IndexingEventListener; import proj.zoie.api.indexing.IndexingEventListener.IndexingEvent; import proj.zoie.api.indexing.ZoieIndexable; import proj.zoie.api.indexing.ZoieIndexableInterpreter; import proj.zoie.impl.indexing.IndexUpdatedEvent; import proj.zoie.impl.indexing.IndexingThread; /** * Runs a background thread that flushes incoming data events in batch to the background DataConsumer. * Incoming data is buffered first. * A flush is carried out when the batch size is significant, * a client requesting a flush, or significant amount of time has passed. * The data is flushed to the underlying dataloader, which is a DataConsumer. * When incoming data comes in too fast, the thread sending data will be put on hold. * This acts as incoming data throttling. * * @param <R> * @param <D> */ public class BatchedIndexDataLoader<R extends IndexReader, D> implements LifeCycleCotrolledDataConsumer<D> { protected int _batchSize; protected long _delay; protected final DataConsumer<ZoieIndexable> _dataLoader; protected List<DataEvent<ZoieIndexable>> _batchList; protected final LoaderThread _loadMgrThread; protected long _lastFlushTime; protected int _eventCount; protected int _maxBatchSize; protected volatile boolean _stop; protected boolean _flush; protected final SearchIndexManager<R> _idxMgr; protected final ZoieIndexableInterpreter<D> _interpreter; private final Queue<IndexingEventListener> _lsnrList; private static Logger log = Logger.getLogger(BatchedIndexDataLoader.class); /** * @param dataLoader * @param batchSize * @param maxBatchSize * @param delay * @param idxMgr * @param lsnrList the list of IndexingEventListeners. This should be a <b>Synchronized</b> list if the content of this list is mutable. */ public BatchedIndexDataLoader(DataConsumer<ZoieIndexable> dataLoader, int batchSize, int maxBatchSize, long delay, SearchIndexManager<R> idxMgr, ZoieIndexableInterpreter<D> interpreter, Queue<IndexingEventListener> lsnrList) { _maxBatchSize = Math.max(maxBatchSize, batchSize); _batchSize = Math.min(batchSize, _maxBatchSize); _delay = delay; _dataLoader = dataLoader; _batchList = new LinkedList<DataEvent<ZoieIndexable>>(); _lastFlushTime = 0L; _eventCount = 0; _loadMgrThread = new LoaderThread(); _loadMgrThread.setName("disk indexer data loader"); _stop = false; _flush = false; _idxMgr = idxMgr; _interpreter = interpreter; _lsnrList = lsnrList; log.info("constructor: _maxBatchSize: " + _maxBatchSize + " _batchSize: " + _batchSize + " _delay: " + _delay); } protected final void fireIndexingEvent(IndexingEvent evt) { if (_lsnrList != null && _lsnrList.size() > 0) { synchronized (_lsnrList) { for (IndexingEventListener lsnr : _lsnrList) { try { lsnr.handleIndexingEvent(evt); } catch (Exception e) { log.error(e.getMessage(), e); } } } } } protected final void fireNewVersionEvent(String newVersion) { if (_lsnrList != null && _lsnrList.size() > 0) { synchronized (_lsnrList) { for (IndexingEventListener lsnr : _lsnrList) { try { lsnr.handleUpdatedDiskVersion(newVersion); } catch (Exception e) { log.error(e.getMessage(), e); } } } } } public synchronized int getMaxBatchSize() { return _maxBatchSize; } public synchronized void setMaxBatchSize(int maxBatchSize) { _maxBatchSize = Math.max(maxBatchSize, _batchSize); _batchSize = Math.min(_batchSize, _maxBatchSize); log.info("setMaxBatchSize: " + _maxBatchSize); } public synchronized int getBatchSize() { return _batchSize; } public synchronized void setBatchSize(int batchSize) { _batchSize = Math.min(Math.max(1, batchSize), _maxBatchSize); log.info("setBatchSize: " + _batchSize); } public synchronized long getDelay() { return _delay; } public synchronized void setDelay(long delay) { _delay = delay; log.info("setDelay: " + _delay); } public synchronized int getEventCount() { return _eventCount; } /** * * @see proj.zoie.api.DataConsumer#consume(java.util.Collection) */ @Override public void consume(Collection<DataEvent<D>> events) throws ZoieException { if (events != null) { ArrayList<DataEvent<ZoieIndexable>> indexableList = new ArrayList<DataEvent<ZoieIndexable>>( events.size()); Iterator<DataEvent<D>> iter = events.iterator(); while (iter.hasNext()) { try { DataEvent<D> event = iter.next(); ZoieIndexable indexable = _interpreter .convertAndInterpret(event.getData()); DataEvent<ZoieIndexable> newEvent = new DataEvent<ZoieIndexable>(indexable, event.getVersion(), event.isDelete()); indexableList.add(newEvent); } catch (Exception e) { ZoieHealth.setFatal(); log.error(e.getMessage(), e); } } synchronized (this) // this blocks the batch disk loader thread while indexing to RAM { while (_batchList.size() > _maxBatchSize) { // check if load manager thread is alive if (_loadMgrThread == null || !_loadMgrThread.isAlive()) { throw new ZoieException("fatal: indexing thread loader manager has stopped"); } try { this.wait(60000); // 1 min } catch (InterruptedException e) { continue; } } _eventCount += indexableList.size(); _batchList.addAll(indexableList); this.notifyAll(); } } } public synchronized int getCurrentBatchSize() { return (_batchList != null ? _batchList.size() : 0); } /** * This method needs to be called within a synchronized block on 'this'. * @return the list of data events already received. A new list is created to receive new data events. */ protected List<DataEvent<ZoieIndexable>> getBatchList() { List<DataEvent<ZoieIndexable>> tmpList = _batchList; _batchList = new LinkedList<DataEvent<ZoieIndexable>>(); return tmpList; } /** * Wait for timeOut amount of time for the indexing thread to process data events. * If there are still remaining unprocessed events by the end of timeOut duration, * a ZoieException is thrown. * @param timeOut a timeout value in milliseconds. * @throws ZoieException */ public void flushEvents(long timeOut) throws ZoieException { synchronized (this) { while (_eventCount > 0) { _flush = true; this.notifyAll(); long now1 = System.currentTimeMillis(); if (timeOut <= 0) { log.error("sync timed out"); throw new ZoieException("timed out"); } try { long waittime = Math.min(200, timeOut); this.wait(waittime); } catch (InterruptedException e) { throw new ZoieException(e.getMessage()); } long now2 = System.currentTimeMillis(); timeOut -= (now2 - now1); } } } /** * Used by the indexing thread to flush incoming data events in batch. * A flush is carried out when the batch size is significant, * a client requesting a flush, or significant amount of time has passed. * The data is flushed to the underlying dataloader, which is a DataConsumer. */ protected void processBatch() { List<DataEvent<ZoieIndexable>> tmpList = null; long now = System.currentTimeMillis(); long duration = now - _lastFlushTime; String currentVersion; try { currentVersion = _idxMgr.getCurrentDiskVersion(); } catch (IOException ioe) { currentVersion = null; } synchronized (this) { while (_batchList.size() < _batchSize && !_stop && !_flush && duration < _delay) { try { this.wait(_delay - duration); } catch (InterruptedException e) { log.warn(e.getMessage()); } now = System.currentTimeMillis(); duration = now - _lastFlushTime; } _flush = false; _lastFlushTime = now; if (_batchList.size() > 0) { // change the status and get the batch list // this has to be done in the block synchronized on BatchIndexDataLoader _idxMgr.setDiskIndexerStatus(SearchIndexManager.Status.Working); tmpList = getBatchList(); } } if (tmpList != null) { long t1 = System.currentTimeMillis(); int eventCount = tmpList.size(); Comparator<String> versioComparator = _idxMgr.getVersionComparator(); for (DataEvent<ZoieIndexable> evt : tmpList) { String newVersion = evt.getVersion(); if (currentVersion == null) { currentVersion = newVersion; } else { if (versioComparator.compare(currentVersion, newVersion) < 0) { currentVersion = newVersion; } } } try { _dataLoader.consume(tmpList); } catch (ZoieException e) { ZoieHealth.setFatal(); log.error(e.getMessage(), e); } finally { long t2 = System.currentTimeMillis(); synchronized (this) { _eventCount -= eventCount; this.notifyAll(); log.info(this + " flushed batch of " + eventCount + " events to disk indexer, took: " + (t2 - t1) + " current event count: " + _eventCount); IndexUpdatedEvent evt = new IndexUpdatedEvent(eventCount, t1, t2, _eventCount); fireIndexingEvent(evt); try { String oldVersion = _idxMgr.getCurrentDiskVersion(); if (currentVersion != null && !currentVersion.equals(oldVersion)) { fireNewVersionEvent(currentVersion); } } catch (IOException ioe) { log.error(ioe.getMessage(), ioe); } } } } else { log.debug("batch size is 0"); } } protected class LoaderThread extends IndexingThread { LoaderThread() { super("disk indexer data loader"); } @Override public void run() { while (!_stop) { processBatch(); } } } /** * Starts the build-in indexing thread. */ @Override public void start() { _loadMgrThread.setName(String.valueOf(this)); _loadMgrThread.start(); } /** * Shutdown the build-in indexing thread and wait until it dies. */ @Override public void stop() { synchronized (this) { _stop = true; this.notifyAll(); } try { _loadMgrThread.join(); } catch (InterruptedException e) { log.error(e.getMessage(), e); } } protected static class ZoieIndexableDecorator extends AbstractZoieIndexable { private final ZoieIndexable _inner; private ZoieIndexableDecorator(ZoieIndexable inner) { _inner = inner; } public static ZoieIndexableDecorator decorate(ZoieIndexable inner) { return (inner == null ? null : new ZoieIndexableDecorator(inner)); } @Override public IndexingReq[] buildIndexingReqs() { return _inner.buildIndexingReqs(); } @Override public long getUID() { return _inner.getUID(); } @Override public boolean isDeleted() { return _inner.isDeleted(); } @Override public boolean isSkip() { return _inner.isSkip(); } } @Override public String getVersion() { throw new UnsupportedOperationException(); } @Override public Comparator<String> getVersionComparator() { throw new UnsupportedOperationException(); } }