package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.store.Directory; import org.apache.lucene.util.ThreadInterruptedException; import java.io.IOException; import java.util.List; import java.util.ArrayList; import java.util.Comparator; import java.util.Collections; /** A {@link MergeScheduler} that runs each merge using a * separate thread. * * <p>Specify the max number of threads that may run at * once with {@link #setMaxThreadCount}.</p> * * <p>Separately specify the maximum number of simultaneous * merges with {@link #setMaxMergeCount}. If the number of * merges exceeds the max number of threads then the * largest merges are paused until one of the smaller * merges completes.</p> * * <p>If more than {@link #getMaxMergeCount} merges are * requested then this class will forcefully throttle the * incoming threads by pausing until one more more merges * complete.</p> */ public class ConcurrentMergeScheduler extends MergeScheduler { private int mergeThreadPriority = -1; protected List<MergeThread> mergeThreads = new ArrayList<MergeThread>(); // Max number of merge threads allowed to be running at // once. When there are more merges then this, we // forcefully pause the larger ones, letting the smaller // ones run, up until maxMergeCount merges at which point // we forcefully pause incoming threads (that presumably // are the ones causing so much merging). We dynamically // default this from 1 to 3, depending on how many cores // you have: private int maxThreadCount = Math.max(1, Math.min(3, Runtime.getRuntime().availableProcessors()/2)); // Max number of merges we accept before forcefully // throttling the incoming threads private int maxMergeCount = maxThreadCount+2; protected Directory dir; private boolean closed; protected IndexWriter writer; protected int mergeThreadCount; public ConcurrentMergeScheduler() { if (allInstances != null) { // Only for testing addMyself(); } } /** Sets the max # simultaneous merge threads that should * be running at once. This must be <= {@link * #setMaxMergeCount}. */ public void setMaxThreadCount(int count) { if (count < 1) { throw new IllegalArgumentException("count should be at least 1"); } if (count > maxMergeCount) { throw new IllegalArgumentException("count should be <= maxMergeCount (= " + maxMergeCount + ")"); } maxThreadCount = count; } /** @see #setMaxThreadCount(int) */ public int getMaxThreadCount() { return maxThreadCount; } /** Sets the max # simultaneous merges that are allowed. * If a merge is necessary yet we already have this many * threads running, the incoming thread (that is calling * add/updateDocument) will block until a merge thread * has completed. Note that we will only run the * smallest {@link #setMaxThreadCount} merges at a time. */ public void setMaxMergeCount(int count) { if (count < 1) { throw new IllegalArgumentException("count should be at least 1"); } if (count < maxThreadCount) { throw new IllegalArgumentException("count should be >= maxThreadCount (= " + maxThreadCount + ")"); } maxMergeCount = count; } /** See {@link #setMaxMergeCount}. */ public int getMaxMergeCount() { return maxMergeCount; } /** Return the priority that merge threads run at. By * default the priority is 1 plus the priority of (ie, * slightly higher priority than) the first thread that * calls merge. */ public synchronized int getMergeThreadPriority() { initMergeThreadPriority(); return mergeThreadPriority; } /** Set the base priority that merge threads run at. * Note that CMS may increase priority of some merge * threads beyond this base priority. It's best not to * set this any higher than * Thread.MAX_PRIORITY-maxThreadCount, so that CMS has * room to set relative priority among threads. */ public synchronized void setMergeThreadPriority(int pri) { if (pri > Thread.MAX_PRIORITY || pri < Thread.MIN_PRIORITY) throw new IllegalArgumentException("priority must be in range " + Thread.MIN_PRIORITY + " .. " + Thread.MAX_PRIORITY + " inclusive"); mergeThreadPriority = pri; updateMergeThreads(); } // Larger merges come first protected static class CompareByMergeDocCount implements Comparator<MergeThread> { public int compare(MergeThread t1, MergeThread t2) { final MergePolicy.OneMerge m1 = t1.getCurrentMerge(); final MergePolicy.OneMerge m2 = t2.getCurrentMerge(); final int c1 = m1 == null ? Integer.MAX_VALUE : m1.segments.totalDocCount(); final int c2 = m2 == null ? Integer.MAX_VALUE : m2.segments.totalDocCount(); return c2 - c1; } } /** Called whenever the running merges have changed, to * pause & unpause threads. */ protected synchronized void updateMergeThreads() { Collections.sort(mergeThreads, new CompareByMergeDocCount()); final int count = mergeThreads.size(); int pri = mergeThreadPriority; for(int i=0;i<count;i++) { final MergeThread mergeThread = mergeThreads.get(i); final MergePolicy.OneMerge merge = mergeThread.getCurrentMerge(); if (merge == null) { continue; } final boolean doPause; if (i < count-maxThreadCount) { doPause = true; } else { doPause = false; } if (verbose()) { if (doPause != merge.getPause()) { if (doPause) { message("pause thread " + mergeThread.getName()); } else { message("unpause thread " + mergeThread.getName()); } } } if (doPause != merge.getPause()) { merge.setPause(doPause); } if (!doPause) { if (verbose()) { message("set priority of merge thread " + mergeThread.getName() + " to " + pri); } mergeThread.setThreadPriority(pri); pri = Math.min(Thread.MAX_PRIORITY, 1+pri); } } } private boolean verbose() { return writer != null && writer.verbose(); } private void message(String message) { if (verbose()) writer.message("CMS: " + message); } private synchronized void initMergeThreadPriority() { if (mergeThreadPriority == -1) { // Default to slightly higher priority than our // calling thread mergeThreadPriority = 1+Thread.currentThread().getPriority(); if (mergeThreadPriority > Thread.MAX_PRIORITY) mergeThreadPriority = Thread.MAX_PRIORITY; } } @Override public void close() { closed = true; } public synchronized void sync() { while(mergeThreadCount() > 0) { if (verbose()) message("now wait for threads; currently " + mergeThreads.size() + " still running"); final int count = mergeThreads.size(); if (verbose()) { for(int i=0;i<count;i++) message(" " + i + ": " + mergeThreads.get(i)); } try { wait(); } catch (InterruptedException ie) { throw new ThreadInterruptedException(ie); } } } private synchronized int mergeThreadCount() { int count = 0; final int numThreads = mergeThreads.size(); for(int i=0;i<numThreads;i++) if (mergeThreads.get(i).isAlive()) count++; return count; } @Override public void merge(IndexWriter writer) throws CorruptIndexException, IOException { assert !Thread.holdsLock(writer); this.writer = writer; initMergeThreadPriority(); dir = writer.getDirectory(); // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. if (verbose()) { message("now merge"); message(" index: " + writer.segString()); } // Iterate, pulling from the IndexWriter's queue of // pending merges, until it's empty: while(true) { // TODO: we could be careful about which merges to do in // the BG (eg maybe the "biggest" ones) vs FG, which // merges to do first (the easiest ones?), etc. MergePolicy.OneMerge merge = writer.getNextMerge(); if (merge == null) { if (verbose()) message(" no more merges pending; now return"); return; } // We do this w/ the primary thread to keep // deterministic assignment of segment names writer.mergeInit(merge); boolean success = false; try { synchronized(this) { final MergeThread merger; long startStallTime = 0; while (mergeThreadCount() >= maxMergeCount) { startStallTime = System.currentTimeMillis(); if (verbose()) { message(" too many merges; stalling..."); } try { wait(); } catch (InterruptedException ie) { throw new ThreadInterruptedException(ie); } } if (verbose()) { if (startStallTime != 0) { message(" stalled for " + (System.currentTimeMillis()-startStallTime) + " msec"); } message(" consider merge " + merge.segString(dir)); } assert mergeThreadCount() < maxMergeCount; // OK to spawn a new merge thread to handle this // merge: merger = getMergeThread(writer, merge); mergeThreads.add(merger); updateMergeThreads(); if (verbose()) message(" launch new thread [" + merger.getName() + "]"); merger.start(); success = true; } } finally { if (!success) { writer.mergeFinish(merge); } } } } /** Does the actual merge, by calling {@link IndexWriter#merge} */ protected void doMerge(MergePolicy.OneMerge merge) throws IOException { writer.merge(merge); } /** Create and return a new MergeThread */ protected synchronized MergeThread getMergeThread(IndexWriter writer, MergePolicy.OneMerge merge) throws IOException { final MergeThread thread = new MergeThread(writer, merge); thread.setThreadPriority(mergeThreadPriority); thread.setDaemon(true); thread.setName("Lucene Merge Thread #" + mergeThreadCount++); return thread; } protected class MergeThread extends Thread { IndexWriter tWriter; MergePolicy.OneMerge startMerge; MergePolicy.OneMerge runningMerge; private volatile boolean done; public MergeThread(IndexWriter writer, MergePolicy.OneMerge startMerge) throws IOException { this.tWriter = writer; this.startMerge = startMerge; } public synchronized void setRunningMerge(MergePolicy.OneMerge merge) { runningMerge = merge; } public synchronized MergePolicy.OneMerge getRunningMerge() { return runningMerge; } public synchronized MergePolicy.OneMerge getCurrentMerge() { if (done) { return null; } else if (runningMerge != null) { return runningMerge; } else { return startMerge; } } public void setThreadPriority(int pri) { try { setPriority(pri); } catch (NullPointerException npe) { // Strangely, Sun's JDK 1.5 on Linux sometimes // throws NPE out of here... } catch (SecurityException se) { // Ignore this because we will still run fine with // normal thread priority } } @Override public void run() { // First time through the while loop we do the merge // that we were started with: MergePolicy.OneMerge merge = this.startMerge; try { if (verbose()) message(" merge thread: start"); while(true) { setRunningMerge(merge); doMerge(merge); // Subsequent times through the loop we do any new // merge that writer says is necessary: merge = tWriter.getNextMerge(); if (merge != null) { tWriter.mergeInit(merge); updateMergeThreads(); if (verbose()) message(" merge thread: do another merge " + merge.segString(dir)); } else { done = true; updateMergeThreads(); break; } } if (verbose()) message(" merge thread: done"); } catch (Throwable exc) { // Ignore the exception if it was due to abort: if (!(exc instanceof MergePolicy.MergeAbortedException)) { if (!suppressExceptions) { // suppressExceptions is normally only set during // testing. anyExceptions = true; handleMergeException(exc); } } } finally { synchronized(ConcurrentMergeScheduler.this) { ConcurrentMergeScheduler.this.notifyAll(); boolean removed = mergeThreads.remove(this); assert removed; updateMergeThreads(); } } } @Override public String toString() { MergePolicy.OneMerge merge = getRunningMerge(); if (merge == null) merge = startMerge; return "merge thread: " + merge.segString(dir); } } /** Called when an exception is hit in a background merge * thread */ protected void handleMergeException(Throwable exc) { try { // When an exception is hit during merge, IndexWriter // removes any partial files and then allows another // merge to run. If whatever caused the error is not // transient then the exception will keep happening, // so, we sleep here to avoid saturating CPU in such // cases: Thread.sleep(250); } catch (InterruptedException ie) { throw new ThreadInterruptedException(ie); } throw new MergePolicy.MergeException(exc, dir); } static boolean anyExceptions = false; /** Used for testing */ public static boolean anyUnhandledExceptions() { if (allInstances == null) { throw new RuntimeException("setTestMode() was not called; often this is because your test case's setUp method fails to call super.setUp in LuceneTestCase"); } synchronized(allInstances) { final int count = allInstances.size(); // Make sure all outstanding threads are done so we see // any exceptions they may produce: for(int i=0;i<count;i++) allInstances.get(i).sync(); boolean v = anyExceptions; anyExceptions = false; return v; } } public static void clearUnhandledExceptions() { synchronized(allInstances) { anyExceptions = false; } } /** Used for testing */ private void addMyself() { synchronized(allInstances) { final int size = allInstances.size(); int upto = 0; for(int i=0;i<size;i++) { final ConcurrentMergeScheduler other = allInstances.get(i); if (!(other.closed && 0 == other.mergeThreadCount())) // Keep this one for now: it still has threads or // may spawn new threads allInstances.set(upto++, other); } allInstances.subList(upto, allInstances.size()).clear(); allInstances.add(this); } } private boolean suppressExceptions; /** Used for testing */ void setSuppressExceptions() { suppressExceptions = true; } /** Used for testing */ void clearSuppressExceptions() { suppressExceptions = false; } /** Used for testing */ private static List<ConcurrentMergeScheduler> allInstances; public static void setTestMode() { allInstances = new ArrayList<ConcurrentMergeScheduler>(); } }