package org.apache.blur.manager.writer; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import static org.apache.blur.metrics.MetricsConstants.LUCENE; import static org.apache.blur.metrics.MetricsConstants.MERGE_THROUGHPUT_BYTES; import static org.apache.blur.metrics.MetricsConstants.ORG_APACHE_BLUR; import static org.apache.blur.utils.BlurConstants.SHARED_MERGE_SCHEDULER_PREFIX; import java.io.Closeable; import java.io.IOException; import java.lang.reflect.Field; import java.util.Iterator; import java.util.List; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.PriorityBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.blur.concurrent.Executors; import org.apache.blur.log.Log; import org.apache.blur.log.LogFactory; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergePolicy.OneMerge; import org.apache.lucene.index.MergeScheduler; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfoPerCommit; import org.apache.lucene.store.Directory; import com.yammer.metrics.Metrics; import com.yammer.metrics.core.Gauge; import com.yammer.metrics.core.Meter; import com.yammer.metrics.core.MetricName; public class SharedMergeScheduler implements Closeable { private static final String LARGE_QUEUE_DEPTH_IN_BYTES = "Large Queue Depth In Bytes"; private static final String LARGE_QUEUE_DEPTH = "Large Queue Depth"; private static final String SMALL_QUEUE_DEPTH_IN_BYTES = "Small Queue Depth In Bytes"; private static final String SMALL_QUEUE_DEPTH = "Small Queue Depth"; private static final Log LOG = LogFactory.getLog(SharedMergeScheduler.class); private static final Meter _throughputBytes; static { MetricName mergeThoughputBytes = new MetricName(ORG_APACHE_BLUR, LUCENE, MERGE_THROUGHPUT_BYTES); _throughputBytes = Metrics.newMeter(mergeThoughputBytes, MERGE_THROUGHPUT_BYTES, TimeUnit.SECONDS); } private final AtomicBoolean _running = new AtomicBoolean(true); private final ExecutorService _smallMergeService; private final ExecutorService _largeMergeService; private final PriorityBlockingQueue<MergeWork> _smallMergeQueue = new PriorityBlockingQueue<MergeWork>(); private final PriorityBlockingQueue<MergeWork> _largeMergeQueue = new PriorityBlockingQueue<MergeWork>(); private final long _smallMergeThreshold; static class MergeWork implements Comparable<MergeWork> { private final String _id; private final MergePolicy.OneMerge _merge; private final IndexWriter _writer; private final long _size; public MergeWork(String id, OneMerge merge, IndexWriter writer) throws IOException { _id = id; _merge = merge; _writer = writer; _size = merge.totalBytesSize(); } @Override public int compareTo(MergeWork o) { if (_size == o._size) { return 0; } return _size < o._size ? -1 : 1; } public void merge() throws IOException { MergeStatus mergeStatus = new MergeStatus(_merge, _writer.getDirectory(), _size, _merge.segments); // Trace.setupTrace(BlurConstants.SHARED_MERGE_SCHEDULER_PREFIX + "-" + // System.nanoTime()); startWatching(mergeStatus); try { _writer.merge(_merge); } finally { stopWatching(mergeStatus); // Trace.tearDownTrace(); } _throughputBytes.mark(_size); } public String getId() { return _id; } public long getSize() { return _size; } } static class MergeStatus { final String _id; final Directory _directory; final long _start; final long _size; final OneMerge _oneMerge; final List<SegmentInfoPerCommit> _segments; MergeStatus(OneMerge oneMerge, Directory directory, long size, List<SegmentInfoPerCommit> segments) { _id = UUID.randomUUID().toString(); _directory = directory; _start = System.nanoTime(); _size = size; _oneMerge = oneMerge; _segments = segments; } void finalReport() throws IOException { long e = System.nanoTime(); double time = (e - _start) / 1000000000.0; double rate = (_size / 1000.0 / 1000.0) / time; SegmentInfo segmentInfo = getSegmentInfo(_oneMerge); long segmentSize = getSegmentSize(segmentInfo, _directory); LOG.info( "Merge took [{0} s] to complete at rate of [{1} MB/s], input bytes [{2}], output bytes [{4}], segments merged {3}", time, rate, _size, _segments, segmentSize); } void report() throws IOException { long e = System.nanoTime(); double time = (e - _start) / 1000000000.0; SegmentInfo segmentInfo = getSegmentInfo(_oneMerge); long segmentSize = getSegmentSize(segmentInfo, _directory); double rate = (segmentSize / 1000.0 / 1000.0) / time; LOG.info( "Merge running for [{0} s] at rate of [{1} MB/s], input bytes [{2}], output bytes [{4}], segments being merged {3}", time, rate, _size, _segments, segmentSize); } } public SharedMergeScheduler(int threads) { this(threads, 128 * 1000 * 1000); } private static ConcurrentMap<String, MergeStatus> _mergeStatusMap = new ConcurrentHashMap<String, MergeStatus>(); protected static void stopWatching(MergeStatus mergeStatus) throws IOException { MergeStatus status = _mergeStatusMap.remove(mergeStatus._id); status.finalReport(); } protected static void startWatching(MergeStatus mergeStatus) { _mergeStatusMap.put(mergeStatus._id, mergeStatus); } public SharedMergeScheduler(int threads, long smallMergeThreshold) { MetricName mergeSmallQueueDepth = new MetricName(ORG_APACHE_BLUR, LUCENE, SMALL_QUEUE_DEPTH); MetricName mergeSmallQueueDepthInBytes = new MetricName(ORG_APACHE_BLUR, LUCENE, SMALL_QUEUE_DEPTH_IN_BYTES); MetricName mergeLargeQueueDepth = new MetricName(ORG_APACHE_BLUR, LUCENE, LARGE_QUEUE_DEPTH); MetricName mergeLargeQueueDepthInBytes = new MetricName(ORG_APACHE_BLUR, LUCENE, LARGE_QUEUE_DEPTH_IN_BYTES); _smallMergeThreshold = smallMergeThreshold; _smallMergeService = Executors.newThreadPool(SHARED_MERGE_SCHEDULER_PREFIX + "-small", threads, false); _largeMergeService = Executors.newThreadPool(SHARED_MERGE_SCHEDULER_PREFIX + "-large", threads, false); for (int i = 0; i < threads; i++) { _smallMergeService.submit(getMergerRunnable(_smallMergeQueue)); _largeMergeService.submit(getMergerRunnable(_largeMergeQueue)); } Metrics.newGauge(mergeSmallQueueDepth, new Gauge<Long>() { @Override public Long value() { return (long) _smallMergeQueue.size(); } }); Metrics.newGauge(mergeSmallQueueDepthInBytes, new Gauge<Long>() { @Override public Long value() { return getSizeInBytes(_smallMergeQueue); } }); Metrics.newGauge(mergeLargeQueueDepth, new Gauge<Long>() { @Override public Long value() { return (long) _largeMergeQueue.size(); } }); Metrics.newGauge(mergeLargeQueueDepthInBytes, new Gauge<Long>() { @Override public Long value() { return getSizeInBytes(_largeMergeQueue); } }); } protected long getSizeInBytes(PriorityBlockingQueue<MergeWork> queue) { long total = 0; for (MergeWork mergeWork : queue) { total += mergeWork.getSize(); } return total; } private Runnable getMergerRunnable(final PriorityBlockingQueue<MergeWork> queue) { return new Runnable() { @Override public void run() { while (_running.get()) { try { MergeWork mergeWork = queue.take(); try { mergeWork.merge(); } catch (Throwable t) { LOG.error("Unknown error while trying to perform merge on [{0}]", t, mergeWork); } } catch (InterruptedException e) { if (_running.get()) { LOG.error("Unknown error", e); } return; } } } }; } public MergeScheduler getMergeScheduler() { return new MergeScheduler() { private final String _id = UUID.randomUUID().toString(); @Override public void merge(IndexWriter writer) throws IOException { addMerges(_id, writer); } @Override public void close() throws IOException { remove(_id); } @Override public MergeScheduler clone() { return getMergeScheduler(); } }; } protected void addMerges(String id, IndexWriter writer) throws IOException { OneMerge merge; while ((merge = writer.getNextMerge()) != null) { addMerge(id, writer, merge); } } private void addMerge(String id, IndexWriter writer, OneMerge merge) throws IOException { MergeWork mergeWork = new MergeWork(id, merge, writer); if (isLargeMerge(merge)) { _largeMergeQueue.add(mergeWork); } else { _smallMergeQueue.add(mergeWork); } } private boolean isLargeMerge(OneMerge merge) throws IOException { long totalBytesSize = merge.totalBytesSize(); if (totalBytesSize <= _smallMergeThreshold) { return false; } return true; } protected void remove(String id) { remove(_smallMergeQueue, id); remove(_largeMergeQueue, id); } private void remove(PriorityBlockingQueue<MergeWork> queue, String id) { Iterator<MergeWork> iterator = queue.iterator(); while (iterator.hasNext()) { MergeWork mergeWork = iterator.next(); if (id.equals(mergeWork.getId())) { iterator.remove(); } } } @Override public void close() throws IOException { _running.set(false); _smallMergeService.shutdownNow(); _largeMergeService.shutdownNow(); } protected static long getSegmentSize(SegmentInfo newSegmentInfo, Directory directory) throws IOException { if (newSegmentInfo == null) { return -1L; } String prefix = newSegmentInfo.name; long total = 0; for (String name : directory.listAll()) { if (name.startsWith(prefix)) { total += directory.fileLength(name); } } return total; } protected static SegmentInfo getSegmentInfo(OneMerge oneMerge) { Object segmentInfoPerCommit = getFieldObject(oneMerge, "info"); if (segmentInfoPerCommit == null) { return null; } return (SegmentInfo) getFieldObject(segmentInfoPerCommit, "info"); } protected static Object getFieldObject(Object o, String fieldName) { try { Field field = o.getClass().getDeclaredField(fieldName); field.setAccessible(true); return field.get(o); } catch (NoSuchFieldException e) { return null; } catch (SecurityException e) { return null; } catch (IllegalArgumentException e) { return null; } catch (IllegalAccessException e) { return null; } } }