package proj.zoie.impl.indexing.internal; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import it.unimi.dsi.fastutil.longs.Long2ObjectMap; import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import it.unimi.dsi.fastutil.longs.LongSet; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.LinkedList; import java.util.List; import org.apache.log4j.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Similarity; import proj.zoie.api.DataConsumer; import proj.zoie.api.ZoieException; import proj.zoie.api.ZoieSegmentReader; import proj.zoie.api.indexing.ZoieIndexable; import proj.zoie.api.indexing.ZoieIndexable.IndexingReq; public abstract class LuceneIndexDataLoader<R extends IndexReader> implements DataConsumer<ZoieIndexable> { private static final Logger log = Logger.getLogger(LuceneIndexDataLoader.class); protected final Analyzer _analyzer; protected final Similarity _similarity; protected final SearchIndexManager<R> _idxMgr; protected LuceneIndexDataLoader(Analyzer analyzer, Similarity similarity,SearchIndexManager<R> idxMgr) { _analyzer = analyzer; _similarity = similarity; _idxMgr=idxMgr; } protected abstract BaseSearchIndex<R> getSearchIndex(); protected abstract void propagateDeletes(LongSet delDocs) throws IOException; protected abstract void commitPropagatedDeletes() throws IOException; /** * @Precondition incoming events sorted by version number * <br>every event in the events collection must be non-null * * @see proj.zoie.api.DataConsumer#consume(java.util.Collection) * */ public void consume(Collection<DataEvent<ZoieIndexable>> events) throws ZoieException { int eventCount = events.size(); if (events == null || eventCount == 0) return; BaseSearchIndex<R> idx = getSearchIndex(); Long2ObjectMap<List<IndexingReq>> addList = new Long2ObjectOpenHashMap<List<IndexingReq>>(); long version = idx.getVersion(); // current version LongSet delSet =new LongOpenHashSet(); try { for(DataEvent<ZoieIndexable> evt : events) { if (evt == null) continue; version = Math.max(version, evt.getVersion()); // interpret and get get the indexable instance ZoieIndexable indexable = evt.getData(); if (indexable == null || indexable.isSkip()) continue; long uid = indexable.getUID(); delSet.add(uid); addList.remove(uid); if (!indexable.isDeleted()) // update event { IndexingReq[] reqs = indexable.buildIndexingReqs(); for (IndexingReq req : reqs) { if (req != null) // if doc is provided, interpret as // a delete, e.g. update with // nothing { Document doc = req.getDocument(); if (doc!=null){ ZoieSegmentReader.fillDocumentID(doc, uid); } // add to the insert list List<IndexingReq> docList = addList.get(uid); if (docList == null) { docList = new LinkedList<IndexingReq>(); addList.put(uid, docList); } docList.add(req); } } } else { addList.remove(uid); } } List<IndexingReq> docList = new ArrayList<IndexingReq>(addList.size()); for (List<IndexingReq> tmpList : addList.values()) { docList.addAll(tmpList); } idx.updateIndex(delSet, docList, _analyzer,_similarity); propagateDeletes(delSet); synchronized(_idxMgr) { idx.refresh(); commitPropagatedDeletes(); } } catch (IOException ioe) { log.error("Problem indexing batch: " + ioe.getMessage(), ioe); } finally { try { if (idx != null) { idx.incrementEventCount(eventCount); idx.setVersion(version); // update the version of the // index } } catch (Exception e) // catch all exceptions, or it would screw // up jobs framework { log.warn(e.getMessage()); } finally { if (idx instanceof DiskSearchIndex<?>) { log.info("disk indexing requests flushed."); } } } } public void loadFromIndex(RAMSearchIndex<R> ramIndex) throws ZoieException { try { BaseSearchIndex<R> idx = getSearchIndex(); idx.loadFromIndex(ramIndex); idx.clearDeletes(); // clear old deletes as deletes are written to the lucene index idx.refresh(); // load the index reader idx.markDeletes(ramIndex.getDelDocs()); // inherit deletes idx.incrementEventCount(ramIndex.getEventsHandled()); idx.setVersion(Math.max(idx.getVersion(), ramIndex.getVersion())); } catch(IOException ioe) { log.error("Problem copying segments: " + ioe.getMessage(), ioe); throw new ZoieException(ioe); } } }