StoreFileReader.java example

Explorer
hbase-master
/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver;

import com.google.common.annotations.VisibleForTesting;

import java.io.DataInput;
import java.io.IOException;
import java.util.Map;
import java.util.SortedSet;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.io.TimeRange;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.util.BloomFilter;
import org.apache.hadoop.hbase.util.BloomFilterFactory;
import org.apache.hadoop.hbase.util.Bytes;

/**
 * Reader for a StoreFile.
 */
@InterfaceAudience.Private
public class StoreFileReader {
  private static final Log LOG = LogFactory.getLog(StoreFileReader.class.getName());

  protected BloomFilter generalBloomFilter = null;
  protected BloomFilter deleteFamilyBloomFilter = null;
  protected BloomType bloomFilterType;
  private final HFile.Reader reader;
  protected long sequenceID = -1;
  protected TimeRange timeRange = null;
  private byte[] lastBloomKey;
  private long deleteFamilyCnt = -1;
  private boolean bulkLoadResult = false;
  private KeyValue.KeyOnlyKeyValue lastBloomKeyOnlyKV = null;
  private boolean skipResetSeqId = true;

  // Counter that is incremented every time a scanner is created on the
  // store file. It is decremented when the scan on the store file is
  // done. All StoreFileReader for the same StoreFile will share this counter.
  private final AtomicInteger refCount;

  // indicate that whether this StoreFileReader is shared, i.e., used for pread. If not, we will
  // close the internal reader when readCompleted is called.
  @VisibleForTesting
  final boolean shared;

  private StoreFileReader(HFile.Reader reader, AtomicInteger refCount, boolean shared) {
    this.reader = reader;
    bloomFilterType = BloomType.NONE;
    this.refCount = refCount;
    this.shared = shared;
  }

  public StoreFileReader(FileSystem fs, Path path, CacheConfig cacheConf,
      boolean primaryReplicaStoreFile, AtomicInteger refCount, boolean shared, Configuration conf)
      throws IOException {
    this(HFile.createReader(fs, path, cacheConf, primaryReplicaStoreFile, conf), refCount, shared);
  }

  public StoreFileReader(FileSystem fs, Path path, FSDataInputStreamWrapper in, long size,
      CacheConfig cacheConf, boolean primaryReplicaStoreFile, AtomicInteger refCount,
      boolean shared, Configuration conf) throws IOException {
    this(HFile.createReader(fs, path, in, size, cacheConf, primaryReplicaStoreFile, conf), refCount,
        shared);
  }

  void copyFields(StoreFileReader reader) {
    this.generalBloomFilter = reader.generalBloomFilter;
    this.deleteFamilyBloomFilter = reader.deleteFamilyBloomFilter;
    this.bloomFilterType = reader.bloomFilterType;
    this.sequenceID = reader.sequenceID;
    this.timeRange = reader.timeRange;
    this.lastBloomKey = reader.lastBloomKey;
    this.bulkLoadResult = reader.bulkLoadResult;
    this.lastBloomKeyOnlyKV = reader.lastBloomKeyOnlyKV;
    this.skipResetSeqId = reader.skipResetSeqId;
  }

  public boolean isPrimaryReplicaReader() {
    return reader.isPrimaryReplicaReader();
  }

  /**
   * ONLY USE DEFAULT CONSTRUCTOR FOR UNIT TESTS
   */
  @VisibleForTesting
  StoreFileReader() {
    this.refCount = new AtomicInteger(0);
    this.reader = null;
    this.shared = false;
  }

  public CellComparator getComparator() {
    return reader.getComparator();
  }

  /**
   * Get a scanner to scan over this StoreFile.
   * @param cacheBlocks should this scanner cache blocks?
   * @param pread use pread (for highly concurrent small readers)
   * @param isCompaction is scanner being used for compaction?
   * @param scannerOrder Order of this scanner relative to other scanners. See
   *          {@link KeyValueScanner#getScannerOrder()}.
   * @param canOptimizeForNonNullColumn {@code true} if we can make sure there is no null column,
   *          otherwise {@code false}. This is a hint for optimization.
   * @return a scanner
   */
  public StoreFileScanner getStoreFileScanner(boolean cacheBlocks, boolean pread,
      boolean isCompaction, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) {
    // Increment the ref count
    refCount.incrementAndGet();
    return new StoreFileScanner(this, getScanner(cacheBlocks, pread, isCompaction),
        !isCompaction, reader.hasMVCCInfo(), readPt, scannerOrder, canOptimizeForNonNullColumn);
  }

  /**
   * Indicate that the scanner has finished reading with this reader. We need to decrement the ref
   * count, and also, if this is not the common pread reader, we should close it.
   */
  void readCompleted() {
    refCount.decrementAndGet();
    if (!shared) {
      try {
        reader.close(false);
      } catch (IOException e) {
        LOG.warn("failed to close stream reader", e);
      }
    }
  }

  /**
   * @deprecated Do not write further code which depends on this call. Instead
   *   use getStoreFileScanner() which uses the StoreFileScanner class/interface
   *   which is the preferred way to scan a store with higher level concepts.
   *
   * @param cacheBlocks should we cache the blocks?
   * @param pread use pread (for concurrent small readers)
   * @return the underlying HFileScanner
   */
  @Deprecated
  public HFileScanner getScanner(boolean cacheBlocks, boolean pread) {
    return getScanner(cacheBlocks, pread, false);
  }

  /**
   * @deprecated Do not write further code which depends on this call. Instead
   *   use getStoreFileScanner() which uses the StoreFileScanner class/interface
   *   which is the preferred way to scan a store with higher level concepts.
   *
   * @param cacheBlocks
   *          should we cache the blocks?
   * @param pread
   *          use pread (for concurrent small readers)
   * @param isCompaction
   *          is scanner being used for compaction?
   * @return the underlying HFileScanner
   */
  @Deprecated
  public HFileScanner getScanner(boolean cacheBlocks, boolean pread,
      boolean isCompaction) {
    return reader.getScanner(cacheBlocks, pread, isCompaction);
  }

  public void close(boolean evictOnClose) throws IOException {
    reader.close(evictOnClose);
  }

  /**
   * Check if this storeFile may contain keys within the TimeRange that
   * have not expired (i.e. not older than oldestUnexpiredTS).
   * @param timeRange the timeRange to restrict
   * @param oldestUnexpiredTS the oldest timestamp that is not expired, as
   *          determined by the column family's TTL
   * @return false if queried keys definitely don't exist in this StoreFile
   */
  boolean passesTimerangeFilter(TimeRange tr, long oldestUnexpiredTS) {
    return this.timeRange == null? true:
      this.timeRange.includesTimeRange(tr) && this.timeRange.getMax() >= oldestUnexpiredTS;
  }

  /**
   * Checks whether the given scan passes the Bloom filter (if present). Only
   * checks Bloom filters for single-row or single-row-column scans. Bloom
   * filter checking for multi-gets is implemented as part of the store
   * scanner system (see {@link StoreFileScanner#seek(Cell)} and uses
   * the lower-level API {@link #passesGeneralRowBloomFilter(byte[], int, int)}
   * and {@link #passesGeneralRowColBloomFilter(Cell)}.
   *
   * @param scan the scan specification. Used to determine the row, and to
   *          check whether this is a single-row ("get") scan.
   * @param columns the set of columns. Only used for row-column Bloom
   *          filters.
   * @return true if the scan with the given column set passes the Bloom
   *         filter, or if the Bloom filter is not applicable for the scan.
   *         False if the Bloom filter is applicable and the scan fails it.
   */
  boolean passesBloomFilter(Scan scan, final SortedSet<byte[]> columns) {
    // Multi-column non-get scans will use Bloom filters through the
    // lower-level API function that this function calls.
    if (!scan.isGetScan()) {
      return true;
    }

    byte[] row = scan.getStartRow();
    switch (this.bloomFilterType) {
      case ROW:
        return passesGeneralRowBloomFilter(row, 0, row.length);

      case ROWCOL:
        if (columns != null && columns.size() == 1) {
          byte[] column = columns.first();
          // create the required fake key
          Cell kvKey = CellUtil.createFirstOnRow(row, HConstants.EMPTY_BYTE_ARRAY, column);
          return passesGeneralRowColBloomFilter(kvKey);
        }

        // For multi-column queries the Bloom filter is checked from the
        // seekExact operation.
        return true;

      default:
        return true;
    }
  }

  public boolean passesDeleteFamilyBloomFilter(byte[] row, int rowOffset,
      int rowLen) {
    // Cache Bloom filter as a local variable in case it is set to null by
    // another thread on an IO error.
    BloomFilter bloomFilter = this.deleteFamilyBloomFilter;

    // Empty file or there is no delete family at all
    if (reader.getTrailer().getEntryCount() == 0 || deleteFamilyCnt == 0) {
      return false;
    }

    if (bloomFilter == null) {
      return true;
    }

    try {
      if (!bloomFilter.supportsAutoLoading()) {
        return true;
      }
      return bloomFilter.contains(row, rowOffset, rowLen, null);
    } catch (IllegalArgumentException e) {
      LOG.error("Bad Delete Family bloom filter data -- proceeding without",
          e);
      setDeleteFamilyBloomFilterFaulty();
    }

    return true;
  }

  /**
   * A method for checking Bloom filters. Called directly from
   * StoreFileScanner in case of a multi-column query.
   *
   * @return True if passes
   */
  public boolean passesGeneralRowBloomFilter(byte[] row, int rowOffset, int rowLen) {
    BloomFilter bloomFilter = this.generalBloomFilter;
    if (bloomFilter == null) {
      return true;
    }

    // Used in ROW bloom
    byte[] key = null;
    if (rowOffset != 0 || rowLen != row.length) {
      throw new AssertionError(
          "For row-only Bloom filters the row " + "must occupy the whole array");
    }
    key = row;
    return checkGeneralBloomFilter(key, null, bloomFilter);
  }

  /**
   * A method for checking Bloom filters. Called directly from
   * StoreFileScanner in case of a multi-column query.
   *
   * @param cell
   *          the cell to check if present in BloomFilter
   * @return True if passes
   */
  public boolean passesGeneralRowColBloomFilter(Cell cell) {
    BloomFilter bloomFilter = this.generalBloomFilter;
    if (bloomFilter == null) {
      return true;
    }
    // Used in ROW_COL bloom
    Cell kvKey = null;
    // Already if the incoming key is a fake rowcol key then use it as it is
    if (cell.getTypeByte() == KeyValue.Type.Maximum.getCode() && cell.getFamilyLength() == 0) {
      kvKey = cell;
    } else {
      kvKey = CellUtil.createFirstOnRowCol(cell);
    }
    return checkGeneralBloomFilter(null, kvKey, bloomFilter);
  }

  private boolean checkGeneralBloomFilter(byte[] key, Cell kvKey, BloomFilter bloomFilter) {
    // Empty file
    if (reader.getTrailer().getEntryCount() == 0) {
      return false;
    }
    HFileBlock bloomBlock = null;
    try {
      boolean shouldCheckBloom;
      ByteBuff bloom;
      if (bloomFilter.supportsAutoLoading()) {
        bloom = null;
        shouldCheckBloom = true;
      } else {
        bloomBlock = reader.getMetaBlock(HFile.BLOOM_FILTER_DATA_KEY, true);
        bloom = bloomBlock.getBufferWithoutHeader();
        shouldCheckBloom = bloom != null;
      }

      if (shouldCheckBloom) {
        boolean exists;

        // Whether the primary Bloom key is greater than the last Bloom key
        // from the file info. For row-column Bloom filters this is not yet
        // a sufficient condition to return false.
        boolean keyIsAfterLast = (lastBloomKey != null);
        // hbase:meta does not have blooms. So we need not have special interpretation
        // of the hbase:meta cells.  We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom
        if (keyIsAfterLast) {
          if (bloomFilterType == BloomType.ROW) {
            keyIsAfterLast = (Bytes.BYTES_RAWCOMPARATOR.compare(key, lastBloomKey) > 0);
          } else {
            keyIsAfterLast = (CellComparator.COMPARATOR.compare(kvKey, lastBloomKeyOnlyKV)) > 0;
          }
        }

        if (bloomFilterType == BloomType.ROWCOL) {
          // Since a Row Delete is essentially a DeleteFamily applied to all
          // columns, a file might be skipped if using row+col Bloom filter.
          // In order to ensure this file is included an additional check is
          // required looking only for a row bloom.
          Cell rowBloomKey = CellUtil.createFirstOnRow(kvKey);
          // hbase:meta does not have blooms. So we need not have special interpretation
          // of the hbase:meta cells.  We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom
          if (keyIsAfterLast
              && (CellComparator.COMPARATOR.compare(rowBloomKey, lastBloomKeyOnlyKV)) > 0) {
            exists = false;
          } else {
            exists =
                bloomFilter.contains(kvKey, bloom, BloomType.ROWCOL) ||
                bloomFilter.contains(rowBloomKey, bloom, BloomType.ROWCOL);
          }
        } else {
          exists = !keyIsAfterLast
              && bloomFilter.contains(key, 0, key.length, bloom);
        }

        return exists;
      }
    } catch (IOException e) {
      LOG.error("Error reading bloom filter data -- proceeding without",
          e);
      setGeneralBloomFilterFaulty();
    } catch (IllegalArgumentException e) {
      LOG.error("Bad bloom filter data -- proceeding without", e);
      setGeneralBloomFilterFaulty();
    } finally {
      // Return the bloom block so that its ref count can be decremented.
      reader.returnBlock(bloomBlock);
    }
    return true;
  }

  /**
   * Checks whether the given scan rowkey range overlaps with the current storefile's
   * @param scan the scan specification. Used to determine the rowkey range.
   * @return true if there is overlap, false otherwise
   */
  public boolean passesKeyRangeFilter(Scan scan) {
    if (this.getFirstKey() == null || this.getLastKey() == null) {
      // the file is empty
      return false;
    }
    if (Bytes.equals(scan.getStartRow(), HConstants.EMPTY_START_ROW)
        && Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) {
      return true;
    }
    byte[] smallestScanRow = scan.isReversed() ? scan.getStopRow() : scan.getStartRow();
    byte[] largestScanRow = scan.isReversed() ? scan.getStartRow() : scan.getStopRow();
    Cell firstKeyKV = this.getFirstKey();
    Cell lastKeyKV = this.getLastKey();
    boolean nonOverLapping = (getComparator().compareRows(firstKeyKV,
        largestScanRow, 0, largestScanRow.length) > 0
        && !Bytes
        .equals(scan.isReversed() ? scan.getStartRow() : scan.getStopRow(),
            HConstants.EMPTY_END_ROW))
        || getComparator().compareRows(lastKeyKV, smallestScanRow, 0, smallestScanRow.length) < 0;
    return !nonOverLapping;
  }

  public Map<byte[], byte[]> loadFileInfo() throws IOException {
    Map<byte [], byte []> fi = reader.loadFileInfo();

    byte[] b = fi.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
    if (b != null) {
      bloomFilterType = BloomType.valueOf(Bytes.toString(b));
    }

    lastBloomKey = fi.get(StoreFile.LAST_BLOOM_KEY);
    if(bloomFilterType == BloomType.ROWCOL) {
      lastBloomKeyOnlyKV = new KeyValue.KeyOnlyKeyValue(lastBloomKey, 0, lastBloomKey.length);
    }
    byte[] cnt = fi.get(StoreFile.DELETE_FAMILY_COUNT);
    if (cnt != null) {
      deleteFamilyCnt = Bytes.toLong(cnt);
    }

    return fi;
  }

  public void loadBloomfilter() {
    this.loadBloomfilter(BlockType.GENERAL_BLOOM_META);
    this.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META);
  }

  public void loadBloomfilter(BlockType blockType) {
    try {
      if (blockType == BlockType.GENERAL_BLOOM_META) {
        if (this.generalBloomFilter != null)
          return; // Bloom has been loaded

        DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
        if (bloomMeta != null) {
          // sanity check for NONE Bloom filter
          if (bloomFilterType == BloomType.NONE) {
            throw new IOException(
                "valid bloom filter type not found in FileInfo");
          } else {
            generalBloomFilter = BloomFilterFactory.createFromMeta(bloomMeta,
                reader);
            if (LOG.isTraceEnabled()) {
              LOG.trace("Loaded " + bloomFilterType.toString() + " "
                + generalBloomFilter.getClass().getSimpleName()
                + " metadata for " + reader.getName());
            }
          }
        }
      } else if (blockType == BlockType.DELETE_FAMILY_BLOOM_META) {
        if (this.deleteFamilyBloomFilter != null)
          return; // Bloom has been loaded

        DataInput bloomMeta = reader.getDeleteBloomFilterMetadata();
        if (bloomMeta != null) {
          deleteFamilyBloomFilter = BloomFilterFactory.createFromMeta(
              bloomMeta, reader);
          LOG.info("Loaded Delete Family Bloom ("
              + deleteFamilyBloomFilter.getClass().getSimpleName()
              + ") metadata for " + reader.getName());
        }
      } else {
        throw new RuntimeException("Block Type: " + blockType.toString()
            + "is not supported for Bloom filter");
      }
    } catch (IOException e) {
      LOG.error("Error reading bloom filter meta for " + blockType
          + " -- proceeding without", e);
      setBloomFilterFaulty(blockType);
    } catch (IllegalArgumentException e) {
      LOG.error("Bad bloom filter meta " + blockType
          + " -- proceeding without", e);
      setBloomFilterFaulty(blockType);
    }
  }

  private void setBloomFilterFaulty(BlockType blockType) {
    if (blockType == BlockType.GENERAL_BLOOM_META) {
      setGeneralBloomFilterFaulty();
    } else if (blockType == BlockType.DELETE_FAMILY_BLOOM_META) {
      setDeleteFamilyBloomFilterFaulty();
    }
  }

  /**
   * The number of Bloom filter entries in this store file, or an estimate
   * thereof, if the Bloom filter is not loaded. This always returns an upper
   * bound of the number of Bloom filter entries.
   *
   * @return an estimate of the number of Bloom filter entries in this file
   */
  public long getFilterEntries() {
    return generalBloomFilter != null ? generalBloomFilter.getKeyCount()
        : reader.getEntries();
  }

  public void setGeneralBloomFilterFaulty() {
    generalBloomFilter = null;
  }

  public void setDeleteFamilyBloomFilterFaulty() {
    this.deleteFamilyBloomFilter = null;
  }

  public Cell getLastKey() {
    return reader.getLastKey();
  }

  public byte[] getLastRowKey() {
    return reader.getLastRowKey();
  }

  public Cell midkey() throws IOException {
    return reader.midkey();
  }

  public long length() {
    return reader.length();
  }

  public long getTotalUncompressedBytes() {
    return reader.getTrailer().getTotalUncompressedBytes();
  }

  public long getEntries() {
    return reader.getEntries();
  }

  public long getDeleteFamilyCnt() {
    return deleteFamilyCnt;
  }

  public Cell getFirstKey() {
    return reader.getFirstKey();
  }

  public long indexSize() {
    return reader.indexSize();
  }

  public BloomType getBloomFilterType() {
    return this.bloomFilterType;
  }

  public long getSequenceID() {
    return sequenceID;
  }

  public void setSequenceID(long sequenceID) {
    this.sequenceID = sequenceID;
  }

  public void setBulkLoaded(boolean bulkLoadResult) {
    this.bulkLoadResult = bulkLoadResult;
  }

  public boolean isBulkLoaded() {
    return this.bulkLoadResult;
  }

  BloomFilter getGeneralBloomFilter() {
    return generalBloomFilter;
  }

  long getUncompressedDataIndexSize() {
    return reader.getTrailer().getUncompressedDataIndexSize();
  }

  public long getTotalBloomSize() {
    if (generalBloomFilter == null)
      return 0;
    return generalBloomFilter.getByteSize();
  }

  public int getHFileVersion() {
    return reader.getTrailer().getMajorVersion();
  }

  public int getHFileMinorVersion() {
    return reader.getTrailer().getMinorVersion();
  }

  public HFile.Reader getHFileReader() {
    return reader;
  }

  void disableBloomFilterForTesting() {
    generalBloomFilter = null;
    this.deleteFamilyBloomFilter = null;
  }

  public long getMaxTimestamp() {
    return timeRange == null ? TimeRange.INITIAL_MAX_TIMESTAMP: timeRange.getMax();
  }

  boolean isSkipResetSeqId() {
    return skipResetSeqId;
  }

  void setSkipResetSeqId(boolean skipResetSeqId) {
    this.skipResetSeqId = skipResetSeqId;
  }
}