CompactionPipeline.java example

Explorer
hbase-master
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;

/**
 * The compaction pipeline of a {@link CompactingMemStore}, is a FIFO queue of segments.
 * It supports pushing a segment at the head of the pipeline and removing a segment from the
 * tail when it is flushed to disk.
 * It also supports swap method to allow the in-memory compaction swap a subset of the segments
 * at the tail of the pipeline with a new (compacted) one. This swap succeeds only if the version
 * number passed with the list of segments to swap is the same as the current version of the
 * pipeline.
 * Essentially, there are two methods which can change the structure of the pipeline: pushHead()
 * and swap(), the later is used both by a flush to disk and by an in-memory compaction.
 * The pipeline version is updated by swap(); it allows to identify conflicting operations at the
 * suffix of the pipeline.
 *
 * The synchronization model is copy-on-write. Methods which change the structure of the
 * pipeline (pushHead() and swap()) apply their changes in the context of a lock. They also make
 * a read-only copy of the pipeline's list. Read methods read from a read-only copy. If a read
 * method accesses the read-only copy more than once it makes a local copy of it
 * to ensure it accesses the same copy.
 *
 * The methods getVersionedList(), getVersionedTail(), and flattenYoungestSegment() are also
 * protected by a lock since they need to have a consistent (atomic) view of the pipeline list
 * and version number.
 */
@InterfaceAudience.Private
public class CompactionPipeline {
  private static final Log LOG = LogFactory.getLog(CompactionPipeline.class);

  public final static long FIXED_OVERHEAD = ClassSize
      .align(ClassSize.OBJECT + (3 * ClassSize.REFERENCE) + Bytes.SIZEOF_LONG);
  public final static long DEEP_OVERHEAD = FIXED_OVERHEAD + (2 * ClassSize.LINKEDLIST);

  private final RegionServicesForStores region;
  private final LinkedList<ImmutableSegment> pipeline = new LinkedList<>();
  // The list is volatile to avoid reading a new allocated reference before the c'tor is executed
  private volatile LinkedList<ImmutableSegment> readOnlyCopy = new LinkedList<>();
  // Version is volatile to ensure it is atomically read when not using a lock
  private volatile long version = 0;

  public CompactionPipeline(RegionServicesForStores region) {
    this.region = region;
  }

  public boolean pushHead(MutableSegment segment) {
    ImmutableSegment immutableSegment = SegmentFactory.instance().
        createImmutableSegment(segment);
    synchronized (pipeline){
      boolean res = addFirst(immutableSegment);
      readOnlyCopy = new LinkedList<>(pipeline);
      return res;
    }
  }

  public VersionedSegmentsList getVersionedList() {
    synchronized (pipeline){
      return new VersionedSegmentsList(readOnlyCopy, version);
    }
  }

  public VersionedSegmentsList getVersionedTail() {
    synchronized (pipeline){
      List<ImmutableSegment> segmentList = new ArrayList<>();
      if(!pipeline.isEmpty()) {
        segmentList.add(0, pipeline.getLast());
      }
      return new VersionedSegmentsList(segmentList, version);
    }
  }

  /**
   * Swaps the versioned list at the tail of the pipeline with a new segment.
   * Swapping only if there were no changes to the suffix of the list since the version list was
   * created.
   * @param versionedList suffix of the pipeline to be replaced can be tail or all the pipeline
   * @param segment new segment to replace the suffix. Can be null if the suffix just needs to be
   *                removed.
   * @param closeSuffix whether to close the suffix (to release memory), as part of swapping it out
   *        During index merge op this will be false and for compaction it will be true.
   * @param updateRegionSize whether to update the region size. Update the region size,
   *                         when the pipeline is swapped as part of in-memory-flush and
   *                         further merge/compaction. Don't update the region size when the
   *                         swap is result of the snapshot (flush-to-disk).
   * @return true iff swapped tail with new segment
   */
  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="VO_VOLATILE_INCREMENT",
      justification="Increment is done under a synchronize block so safe")
  public boolean swap(VersionedSegmentsList versionedList, ImmutableSegment segment,
      boolean closeSuffix, boolean updateRegionSize) {
    if (versionedList.getVersion() != version) {
      return false;
    }
    List<ImmutableSegment> suffix;
    synchronized (pipeline){
      if(versionedList.getVersion() != version) {
        return false;
      }
      suffix = versionedList.getStoreSegments();
      if (LOG.isDebugEnabled()) {
        int count = 0;
        if(segment != null) {
          count = segment.getCellsCount();
        }
        LOG.debug("Swapping pipeline suffix. "
            + "Just before the swap the number of segments in pipeline is:"
            + versionedList.getStoreSegments().size()
            + ", and the number of cells in new segment is:" + count);
      }
      swapSuffix(suffix, segment, closeSuffix);
      readOnlyCopy = new LinkedList<>(pipeline);
      version++;
    }
    if (updateRegionSize && region != null) {
      // update the global memstore size counter
      long suffixDataSize = getSegmentsKeySize(suffix);
      long newDataSize = 0;
      if(segment != null) newDataSize = segment.keySize();
      long dataSizeDelta = suffixDataSize - newDataSize;
      long suffixHeapSize = getSegmentsHeapSize(suffix);
      long newHeapSize = 0;
      if(segment != null) newHeapSize = segment.heapSize();
      long heapSizeDelta = suffixHeapSize - newHeapSize;
      region.addMemstoreSize(new MemstoreSize(-dataSizeDelta, -heapSizeDelta));
      if (LOG.isDebugEnabled()) {
        LOG.debug("Suffix data size: " + suffixDataSize + " new segment data size: "
            + newDataSize + ". Suffix heap size: " + suffixHeapSize
            + " new segment heap size: " + newHeapSize);
      }
    }
    return true;
  }

  private static long getSegmentsHeapSize(List<? extends Segment> list) {
    long res = 0;
    for (Segment segment : list) {
      res += segment.heapSize();
    }
    return res;
  }

  private static long getSegmentsKeySize(List<? extends Segment> list) {
    long res = 0;
    for (Segment segment : list) {
      res += segment.keySize();
    }
    return res;
  }

  /**
   * If the caller holds the current version, go over the the pipeline and try to flatten each
   * segment. Flattening is replacing the ConcurrentSkipListMap based CellSet to CellArrayMap based.
   * Flattening of the segment that initially is not based on ConcurrentSkipListMap has no effect.
   * Return after one segment is successfully flatten.
   *
   * @return true iff a segment was successfully flattened
   */
  public boolean flattenYoungestSegment(long requesterVersion) {

    if(requesterVersion != version) {
      LOG.warn("Segment flattening failed, because versions do not match. Requester version: "
          + requesterVersion + ", actual version: " + version);
      return false;
    }

    synchronized (pipeline){
      if(requesterVersion != version) {
        LOG.warn("Segment flattening failed, because versions do not match");
        return false;
      }

      for (ImmutableSegment s : pipeline) {
        // remember the old size in case this segment is going to be flatten
        MemstoreSize memstoreSize = new MemstoreSize();
        if (s.flatten(memstoreSize)) {
          if(region != null) {
            region.addMemstoreSize(memstoreSize);
          }
          LOG.debug("Compaction pipeline segment " + s + " was flattened");
          return true;
        }
      }

    }
    // do not update the global memstore size counter and do not increase the version,
    // because all the cells remain in place
    return false;
  }

  public boolean isEmpty() {
    return readOnlyCopy.isEmpty();
  }

  public List<? extends Segment> getSegments() {
    return readOnlyCopy;
  }

  public long size() {
    return readOnlyCopy.size();
  }

  public long getMinSequenceId() {
    long minSequenceId = Long.MAX_VALUE;
    LinkedList<? extends Segment> localCopy = readOnlyCopy;
    if (!localCopy.isEmpty()) {
      minSequenceId = localCopy.getLast().getMinSequenceId();
    }
    return minSequenceId;
  }

  public MemstoreSize getTailSize() {
    LinkedList<? extends Segment> localCopy = readOnlyCopy;
    if (localCopy.isEmpty()) return new MemstoreSize(true);
    return new MemstoreSize(localCopy.peekLast().keySize(), localCopy.peekLast().heapSize());
  }

  public MemstoreSize getPipelineSize() {
    long keySize = 0;
    long heapSize = 0;
    LinkedList<? extends Segment> localCopy = readOnlyCopy;
    if (localCopy.isEmpty()) return new MemstoreSize(true);
    for (Segment segment : localCopy) {
      keySize += segment.keySize();
      heapSize += segment.heapSize();
    }
    return new MemstoreSize(keySize, heapSize);
  }

  private void swapSuffix(List<? extends Segment> suffix, ImmutableSegment segment,
      boolean closeSegmentsInSuffix) {
    // During index merge we won't be closing the segments undergoing the merge. Segment#close()
    // will release the MSLAB chunks to pool. But in case of index merge there wont be any data copy
    // from old MSLABs. So the new cells in new segment also refers to same chunks. In case of data
    // compaction, we would have copied the cells data from old MSLAB chunks into a new chunk
    // created for the result segment. So we can release the chunks associated with the compacted
    // segments.
    if (closeSegmentsInSuffix) {
      for (Segment itemInSuffix : suffix) {
        itemInSuffix.close();
      }
    }
    pipeline.removeAll(suffix);
    if(segment != null) pipeline.addLast(segment);
  }

  public Segment getTail() {
    List<? extends Segment> localCopy = getSegments();
    if(localCopy.isEmpty()) {
      return null;
    }
    return localCopy.get(localCopy.size()-1);
  }

  private boolean addFirst(ImmutableSegment segment) {
    pipeline.addFirst(segment);
    return true;
  }

  // debug method
  private boolean validateSuffixList(LinkedList<ImmutableSegment> suffix) {
    if(suffix.isEmpty()) {
      // empty suffix is always valid
      return true;
    }
    Iterator<ImmutableSegment> pipelineBackwardIterator = pipeline.descendingIterator();
    Iterator<ImmutableSegment> suffixBackwardIterator = suffix.descendingIterator();
    ImmutableSegment suffixCurrent;
    ImmutableSegment pipelineCurrent;
    for( ; suffixBackwardIterator.hasNext(); ) {
      if(!pipelineBackwardIterator.hasNext()) {
        // a suffix longer than pipeline is invalid
        return false;
      }
      suffixCurrent = suffixBackwardIterator.next();
      pipelineCurrent = pipelineBackwardIterator.next();
      if(suffixCurrent != pipelineCurrent) {
        // non-matching suffix
        return false;
      }
    }
    // suffix matches pipeline suffix
    return true;
  }

}