package proj.zoie.impl.indexing.internal;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import it.unimi.dsi.fastutil.longs.Long2ObjectMap;
import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.similarities.Similarity;
import proj.zoie.api.DataConsumer;
import proj.zoie.api.ZoieException;
import proj.zoie.api.ZoieHealth;
import proj.zoie.api.ZoieSegmentReader;
import proj.zoie.api.indexing.AbstractZoieIndexable;
import proj.zoie.api.indexing.ZoieIndexable;
import proj.zoie.api.indexing.ZoieIndexable.IndexingReq;
import proj.zoie.impl.indexing.internal.SearchIndexManager.Status;
public abstract class LuceneIndexDataLoader<R extends IndexReader> implements
DataConsumer<ZoieIndexable> {
private static final Logger log = Logger.getLogger(LuceneIndexDataLoader.class);
protected final Analyzer _analyzer;
protected final Similarity _similarity;
protected final SearchIndexManager<R> _idxMgr;
protected final Comparator<String> _versionComparator;
private Filter _purgeFilter;
protected LuceneIndexDataLoader(Analyzer analyzer, Similarity similarity,
SearchIndexManager<R> idxMgr, Comparator<String> versionComparator) {
_analyzer = analyzer;
_similarity = similarity;
_idxMgr = idxMgr;
_versionComparator = versionComparator;
_purgeFilter = null;
}
public void setPurgeFilter(Filter purgeFilter) {
_purgeFilter = purgeFilter;
}
protected abstract BaseSearchIndex<R> getSearchIndex();
protected abstract void propagateDeletes(LongSet delDocs) throws IOException;
protected abstract void commitPropagatedDeletes() throws IOException;
private final void purgeDocuments() {
if (_purgeFilter != null) {
BaseSearchIndex<R> idx = getSearchIndex();
IndexWriter writer = null;
log.info("purging docs started...");
int count = 0;
long start = System.currentTimeMillis();
try {
writer = idx.openIndexWriter(null, null);
ConstantScoreQuery q = new ConstantScoreQuery(_purgeFilter);
writer.deleteDocuments(q);
writer.commit();
} catch (Throwable th) {
log.error("problem creating purge filter: " + th.getMessage(), th);
} finally {
idx.closeIndexWriter();
}
long end = System.currentTimeMillis();
log.info("purging docs completed in " + (end - start) + "ms");
log.info("total docs purged: " + count);
}
}
/**
* @param events incoming events sorted by version number
* <br>every event in the events collection must be non-null
*
* @see proj.zoie.api.DataConsumer#consume(java.util.Collection)
*
*/
@Override
public void consume(Collection<DataEvent<ZoieIndexable>> events) throws ZoieException {
if (events == null) {
return;
}
int eventCount = events.size();
if (eventCount == 0) {
return;
}
BaseSearchIndex<R> idx = getSearchIndex();
if (idx == null) {
throw new ZoieException("trying to consume to null index");
}
Long2ObjectMap<List<IndexingReq>> addList = new Long2ObjectOpenHashMap<List<IndexingReq>>();
String version = idx.getVersion(); // current version
LongSet delSet = new LongOpenHashSet();
try {
for (DataEvent<ZoieIndexable> evt : events) {
if (evt == null) continue;
version = version == null ? evt.getVersion() : (_versionComparator.compare(version,
evt.getVersion()) < 0 ? evt.getVersion() : version);
// interpret and get get the indexable instance
ZoieIndexable indexable = evt.getData();
if (indexable == null || indexable.isSkip()) continue;
long uid = indexable.getUID();
delSet.add(uid);
addList.remove(uid);
if (!(indexable.isDeleted() || evt.isDelete())) // update event
{
try {
IndexingReq[] reqs = indexable.buildIndexingReqs();
for (IndexingReq req : reqs) {
if (req != null) // if doc is provided, interpret as
// a delete, e.g. update with
// nothing
{
Document doc = req.getDocument();
if (doc != null) {
ZoieSegmentReader.fillDocumentID(doc, uid);
if (indexable.isStorable()) {
byte[] bytes = indexable.getStoreValue();
if (bytes != null) {
doc.add(new StoredField(AbstractZoieIndexable.DOCUMENT_STORE_FIELD, bytes));
}
}
}
// add to the insert list
List<IndexingReq> docList = addList.get(uid);
if (docList == null) {
docList = new LinkedList<IndexingReq>();
addList.put(uid, docList);
}
docList.add(req);
}
}
} catch (Exception ex) {
log.error("Couldn't index the event with uid - " + uid, ex);
}
}
}
List<IndexingReq> docList = new ArrayList<IndexingReq>(addList.size());
for (List<IndexingReq> tmpList : addList.values()) {
docList.addAll(tmpList);
}
purgeDocuments();
idx.updateIndex(delSet, docList, _analyzer, _similarity);
propagateDeletes(delSet);
synchronized (_idxMgr) {
idx.refresh();
commitPropagatedDeletes();
}
} catch (IOException ioe) {
ZoieHealth.setFatal();
log.error("Problem indexing batch: " + ioe.getMessage(), ioe);
} finally {
try {
if (idx != null) {
idx.setVersion(version);
idx.incrementEventCount(eventCount);
}
} catch (Exception e) // catch all exceptions, or it would screw
// up jobs framework
{
log.warn(e.getMessage());
} finally {
if (idx instanceof DiskSearchIndex<?>) {
log.info("disk indexing requests flushed.");
}
}
}
}
public void loadFromIndex(RAMSearchIndex<R> ramIndex) throws ZoieException {
try {
// get disk search idx,
BaseSearchIndex<R> idx = getSearchIndex();
// merge the realyOnly ram idx with the disk idx
idx.loadFromIndex(ramIndex);
// set new version
String newVersion = idx.getVersion() == null ? ramIndex.getVersion() : (_versionComparator
.compare(idx.getVersion(), ramIndex.getVersion()) < 0 ? ramIndex.getVersion() : idx
.getVersion());
idx.setVersion(newVersion);
synchronized (_idxMgr) {
// update the disk idx reader
idx.refresh();
purgeDocuments();
// inherit deletes
idx.markDeletes(ramIndex.getDelDocs());
idx.commitDeletes();
idx.incrementEventCount(ramIndex.getEventsHandled());
_idxMgr.setDiskIndexerStatus(Status.Sleep);
}
} catch (IOException ioe) {
ZoieHealth.setFatal();
log.error("Problem copying segments: " + ioe.getMessage(), ioe);
throw new ZoieException(ioe);
}
}
/**
* @return the version number of the search index.
*/
@Override
public String getVersion() {
BaseSearchIndex<R> idx = getSearchIndex();
String version = null;
if (idx != null) version = idx.getVersion();
return version;
}
/**
* @return the version comparator.
*/
@Override
public Comparator<String> getVersionComparator() {
return _versionComparator;
}
}