/* * Copyright (c) 2011 LinkedIn, Inc * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.flaptor.indextank.index.lsi; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.log4j.Logger; import com.flaptor.indextank.Indexer; import com.flaptor.indextank.index.DocId; import com.flaptor.indextank.index.Document; import com.flaptor.indextank.index.QueryMatcher; import com.flaptor.indextank.index.TopMatches; import com.flaptor.indextank.index.scorer.FacetingManager; import com.flaptor.indextank.index.scorer.Scorer; import com.flaptor.indextank.query.IndexEngineParser; import com.flaptor.indextank.query.Query; import com.flaptor.util.Execute; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.Maps; public class LargeScaleIndex implements QueryMatcher, Indexer { private static final Logger logger = Logger.getLogger(Execute.whoAmI()); private static final String INDEX_DIRECTORY = "index"; protected LsiIndex index; protected LsiIndexer indexer; //private LsiSearcher searcher; private QueryMatcher searcher; private BlockingQueue<Operation> queue; @SuppressWarnings("unused") private Scorer scorer; private File baseDir; private ReadWriteLock rwl; private Lock r; private Lock w; private boolean checkpoint; LargeScaleIndex() { // empty constructor for test stubs } /** * Create an LSI and its components * * @param scorer The scorer to use when ranking results * @param parser * @param basePath The base path (a directory) from the which all the LSI directories will be found. */ public LargeScaleIndex(Scorer scorer, IndexEngineParser parser, File baseDir, FacetingManager facetingManager) { Preconditions.checkNotNull(scorer); Preconditions.checkNotNull(parser); Preconditions.checkNotNull(baseDir); this.baseDir = baseDir; if (!baseDir.exists() || !baseDir.isDirectory()) { throw new IllegalArgumentException("The basePath must be an existing directory"); } File indexDir= new File(baseDir,INDEX_DIRECTORY); if (!indexDir.exists()) { logger.info("Starting with a FRESH, BRAND NEW index."); indexDir.mkdir(); } try { index = new LsiIndex(parser, indexDir.getAbsolutePath(), scorer, facetingManager); } catch (IOException e) { throw new IllegalArgumentException("IOException when trying to use the directory set in the index.directory property.", e); } this.scorer = scorer; this.indexer = new LsiIndexer(index); this.searcher = new LsiSearcher(index); this.queue = new ArrayBlockingQueue<Operation>(1000); this.rwl = new ReentrantReadWriteLock(); this.r = rwl.readLock(); this.w = rwl.writeLock(); this.checkpoint = false; } @Override public boolean hasChanges(DocId docid) throws InterruptedException { return searcher.hasChanges(docid); } @Override public TopMatches findMatches(Query query, int limit, int scoringFunctionIndex) throws InterruptedException { return searcher.findMatches(query, limit, scoringFunctionIndex); } @Override public TopMatches findMatches(Query query, Predicate<DocId> docFilter, int limit, int scoringFunctionIndex) throws InterruptedException { return searcher.findMatches(query, docFilter, limit, scoringFunctionIndex); } /* * (non-Javadoc) * @see com.flaptor.indextank.index.IIndexer#add(java.lang.String, com.flaptor.indextank.index.Document) */ public void add(String docid, Document doc) { r.lock(); boolean enqueue = this.checkpoint; r.unlock(); if (enqueue) { try { logger.debug("enqueueing " + docid + " for later indexing" ); this.queue.put(new AddOperation(docid,doc)); } catch (InterruptedException ie){ // TODO log it here, document dumped / lost } } else { indexer.add(docid,doc); } } /* * (non-Javadoc) * @see com.flaptor.indextank.index.IIndexer#del(java.lang.String) */ public void del(String docid) { r.lock(); boolean enqueue = this.checkpoint; r.unlock(); if (enqueue) { try { this.queue.put(new DelOperation(docid)); } catch (InterruptedException ie){ // TODO log it here, document dumped / lost } } else { indexer.del(docid); } } public void startDump(DumpCompletionListener listener){ w.lock(); if (this.checkpoint) { throw new IllegalStateException("2 simultaneous dumps");// TODO 2 checkpoints simultaneous?; } this.checkpoint = true; w.unlock(); logger.debug("About to start a directory checkpoint"); indexer.makeDirectoryCheckpoint(); logger.debug("Directory checkpoint done. Telling listener about it."); listener.dumpCompleted(); logger.debug("Consuming queue of pending operations for next segment..."); OperationWorker worker = new OperationWorker(this.indexer,this.queue); worker.start(); try { worker.join(); } catch (InterruptedException ie){ // TODO log ie } // TODO buggy. something may get into the queue after the worker loop. logger.debug("Done consuming the queue. Ready to accept direct operations"); w.lock(); this.checkpoint = false; w.unlock(); } public File getBaseDir() { return baseDir; } /** * INNER CLASSES */ private interface Operation { public void execute(LsiIndexer indexer); } private class AddOperation implements Operation { Document doc; String docid; AddOperation(String docid, Document doc){ this.docid = docid; this.doc = doc; } public void execute(LsiIndexer indexer){ indexer.add(this.docid,this.doc); } } private class DelOperation implements Operation { String docid; DelOperation(String docid){ this.docid = docid; } public void execute(LsiIndexer indexer){ indexer.del(this.docid); } } private class OperationWorker extends Thread { BlockingQueue<Operation> queue; LsiIndexer indexer; OperationWorker(LsiIndexer indexer, BlockingQueue<Operation> queue){ this.queue = queue; this.indexer = indexer; } public void run(){ while (true){ Operation op = this.queue.poll(); if (null != op) { op.execute(this.indexer); } else { break; } } } } @Override public int countMatches(Query query) throws InterruptedException { return searcher.countMatches(query); } @Override public int countMatches(Query query, Predicate<DocId> idFilter) throws InterruptedException { return searcher.countMatches(query, idFilter); } public Map<String, String> getStats() { HashMap<String, String> stats = Maps.newHashMap(index.getStats()); stats.put("lsi_queue_size", String.valueOf(queue.size())); return stats; } }