/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.regionserver; import com.google.common.annotations.VisibleForTesting; import java.io.DataInput; import java.io.IOException; import java.util.Map; import java.util.SortedSet; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.io.TimeRange; import org.apache.hadoop.hbase.io.hfile.BlockType; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileBlock; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.util.BloomFilter; import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.hbase.util.Bytes; /** * Reader for a StoreFile. */ @InterfaceAudience.Private public class StoreFileReader { private static final Log LOG = LogFactory.getLog(StoreFileReader.class.getName()); protected BloomFilter generalBloomFilter = null; protected BloomFilter deleteFamilyBloomFilter = null; protected BloomType bloomFilterType; private final HFile.Reader reader; protected long sequenceID = -1; protected TimeRange timeRange = null; private byte[] lastBloomKey; private long deleteFamilyCnt = -1; private boolean bulkLoadResult = false; private KeyValue.KeyOnlyKeyValue lastBloomKeyOnlyKV = null; private boolean skipResetSeqId = true; // Counter that is incremented every time a scanner is created on the // store file. It is decremented when the scan on the store file is // done. All StoreFileReader for the same StoreFile will share this counter. private final AtomicInteger refCount; // indicate that whether this StoreFileReader is shared, i.e., used for pread. If not, we will // close the internal reader when readCompleted is called. @VisibleForTesting final boolean shared; private StoreFileReader(HFile.Reader reader, AtomicInteger refCount, boolean shared) { this.reader = reader; bloomFilterType = BloomType.NONE; this.refCount = refCount; this.shared = shared; } public StoreFileReader(FileSystem fs, Path path, CacheConfig cacheConf, boolean primaryReplicaStoreFile, AtomicInteger refCount, boolean shared, Configuration conf) throws IOException { this(HFile.createReader(fs, path, cacheConf, primaryReplicaStoreFile, conf), refCount, shared); } public StoreFileReader(FileSystem fs, Path path, FSDataInputStreamWrapper in, long size, CacheConfig cacheConf, boolean primaryReplicaStoreFile, AtomicInteger refCount, boolean shared, Configuration conf) throws IOException { this(HFile.createReader(fs, path, in, size, cacheConf, primaryReplicaStoreFile, conf), refCount, shared); } void copyFields(StoreFileReader reader) { this.generalBloomFilter = reader.generalBloomFilter; this.deleteFamilyBloomFilter = reader.deleteFamilyBloomFilter; this.bloomFilterType = reader.bloomFilterType; this.sequenceID = reader.sequenceID; this.timeRange = reader.timeRange; this.lastBloomKey = reader.lastBloomKey; this.bulkLoadResult = reader.bulkLoadResult; this.lastBloomKeyOnlyKV = reader.lastBloomKeyOnlyKV; this.skipResetSeqId = reader.skipResetSeqId; } public boolean isPrimaryReplicaReader() { return reader.isPrimaryReplicaReader(); } /** * ONLY USE DEFAULT CONSTRUCTOR FOR UNIT TESTS */ @VisibleForTesting StoreFileReader() { this.refCount = new AtomicInteger(0); this.reader = null; this.shared = false; } public CellComparator getComparator() { return reader.getComparator(); } /** * Get a scanner to scan over this StoreFile. * @param cacheBlocks should this scanner cache blocks? * @param pread use pread (for highly concurrent small readers) * @param isCompaction is scanner being used for compaction? * @param scannerOrder Order of this scanner relative to other scanners. See * {@link KeyValueScanner#getScannerOrder()}. * @param canOptimizeForNonNullColumn {@code true} if we can make sure there is no null column, * otherwise {@code false}. This is a hint for optimization. * @return a scanner */ public StoreFileScanner getStoreFileScanner(boolean cacheBlocks, boolean pread, boolean isCompaction, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) { // Increment the ref count refCount.incrementAndGet(); return new StoreFileScanner(this, getScanner(cacheBlocks, pread, isCompaction), !isCompaction, reader.hasMVCCInfo(), readPt, scannerOrder, canOptimizeForNonNullColumn); } /** * Indicate that the scanner has finished reading with this reader. We need to decrement the ref * count, and also, if this is not the common pread reader, we should close it. */ void readCompleted() { refCount.decrementAndGet(); if (!shared) { try { reader.close(false); } catch (IOException e) { LOG.warn("failed to close stream reader", e); } } } /** * @deprecated Do not write further code which depends on this call. Instead * use getStoreFileScanner() which uses the StoreFileScanner class/interface * which is the preferred way to scan a store with higher level concepts. * * @param cacheBlocks should we cache the blocks? * @param pread use pread (for concurrent small readers) * @return the underlying HFileScanner */ @Deprecated public HFileScanner getScanner(boolean cacheBlocks, boolean pread) { return getScanner(cacheBlocks, pread, false); } /** * @deprecated Do not write further code which depends on this call. Instead * use getStoreFileScanner() which uses the StoreFileScanner class/interface * which is the preferred way to scan a store with higher level concepts. * * @param cacheBlocks * should we cache the blocks? * @param pread * use pread (for concurrent small readers) * @param isCompaction * is scanner being used for compaction? * @return the underlying HFileScanner */ @Deprecated public HFileScanner getScanner(boolean cacheBlocks, boolean pread, boolean isCompaction) { return reader.getScanner(cacheBlocks, pread, isCompaction); } public void close(boolean evictOnClose) throws IOException { reader.close(evictOnClose); } /** * Check if this storeFile may contain keys within the TimeRange that * have not expired (i.e. not older than oldestUnexpiredTS). * @param timeRange the timeRange to restrict * @param oldestUnexpiredTS the oldest timestamp that is not expired, as * determined by the column family's TTL * @return false if queried keys definitely don't exist in this StoreFile */ boolean passesTimerangeFilter(TimeRange tr, long oldestUnexpiredTS) { return this.timeRange == null? true: this.timeRange.includesTimeRange(tr) && this.timeRange.getMax() >= oldestUnexpiredTS; } /** * Checks whether the given scan passes the Bloom filter (if present). Only * checks Bloom filters for single-row or single-row-column scans. Bloom * filter checking for multi-gets is implemented as part of the store * scanner system (see {@link StoreFileScanner#seek(Cell)} and uses * the lower-level API {@link #passesGeneralRowBloomFilter(byte[], int, int)} * and {@link #passesGeneralRowColBloomFilter(Cell)}. * * @param scan the scan specification. Used to determine the row, and to * check whether this is a single-row ("get") scan. * @param columns the set of columns. Only used for row-column Bloom * filters. * @return true if the scan with the given column set passes the Bloom * filter, or if the Bloom filter is not applicable for the scan. * False if the Bloom filter is applicable and the scan fails it. */ boolean passesBloomFilter(Scan scan, final SortedSet<byte[]> columns) { // Multi-column non-get scans will use Bloom filters through the // lower-level API function that this function calls. if (!scan.isGetScan()) { return true; } byte[] row = scan.getStartRow(); switch (this.bloomFilterType) { case ROW: return passesGeneralRowBloomFilter(row, 0, row.length); case ROWCOL: if (columns != null && columns.size() == 1) { byte[] column = columns.first(); // create the required fake key Cell kvKey = CellUtil.createFirstOnRow(row, HConstants.EMPTY_BYTE_ARRAY, column); return passesGeneralRowColBloomFilter(kvKey); } // For multi-column queries the Bloom filter is checked from the // seekExact operation. return true; default: return true; } } public boolean passesDeleteFamilyBloomFilter(byte[] row, int rowOffset, int rowLen) { // Cache Bloom filter as a local variable in case it is set to null by // another thread on an IO error. BloomFilter bloomFilter = this.deleteFamilyBloomFilter; // Empty file or there is no delete family at all if (reader.getTrailer().getEntryCount() == 0 || deleteFamilyCnt == 0) { return false; } if (bloomFilter == null) { return true; } try { if (!bloomFilter.supportsAutoLoading()) { return true; } return bloomFilter.contains(row, rowOffset, rowLen, null); } catch (IllegalArgumentException e) { LOG.error("Bad Delete Family bloom filter data -- proceeding without", e); setDeleteFamilyBloomFilterFaulty(); } return true; } /** * A method for checking Bloom filters. Called directly from * StoreFileScanner in case of a multi-column query. * * @return True if passes */ public boolean passesGeneralRowBloomFilter(byte[] row, int rowOffset, int rowLen) { BloomFilter bloomFilter = this.generalBloomFilter; if (bloomFilter == null) { return true; } // Used in ROW bloom byte[] key = null; if (rowOffset != 0 || rowLen != row.length) { throw new AssertionError( "For row-only Bloom filters the row " + "must occupy the whole array"); } key = row; return checkGeneralBloomFilter(key, null, bloomFilter); } /** * A method for checking Bloom filters. Called directly from * StoreFileScanner in case of a multi-column query. * * @param cell * the cell to check if present in BloomFilter * @return True if passes */ public boolean passesGeneralRowColBloomFilter(Cell cell) { BloomFilter bloomFilter = this.generalBloomFilter; if (bloomFilter == null) { return true; } // Used in ROW_COL bloom Cell kvKey = null; // Already if the incoming key is a fake rowcol key then use it as it is if (cell.getTypeByte() == KeyValue.Type.Maximum.getCode() && cell.getFamilyLength() == 0) { kvKey = cell; } else { kvKey = CellUtil.createFirstOnRowCol(cell); } return checkGeneralBloomFilter(null, kvKey, bloomFilter); } private boolean checkGeneralBloomFilter(byte[] key, Cell kvKey, BloomFilter bloomFilter) { // Empty file if (reader.getTrailer().getEntryCount() == 0) { return false; } HFileBlock bloomBlock = null; try { boolean shouldCheckBloom; ByteBuff bloom; if (bloomFilter.supportsAutoLoading()) { bloom = null; shouldCheckBloom = true; } else { bloomBlock = reader.getMetaBlock(HFile.BLOOM_FILTER_DATA_KEY, true); bloom = bloomBlock.getBufferWithoutHeader(); shouldCheckBloom = bloom != null; } if (shouldCheckBloom) { boolean exists; // Whether the primary Bloom key is greater than the last Bloom key // from the file info. For row-column Bloom filters this is not yet // a sufficient condition to return false. boolean keyIsAfterLast = (lastBloomKey != null); // hbase:meta does not have blooms. So we need not have special interpretation // of the hbase:meta cells. We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom if (keyIsAfterLast) { if (bloomFilterType == BloomType.ROW) { keyIsAfterLast = (Bytes.BYTES_RAWCOMPARATOR.compare(key, lastBloomKey) > 0); } else { keyIsAfterLast = (CellComparator.COMPARATOR.compare(kvKey, lastBloomKeyOnlyKV)) > 0; } } if (bloomFilterType == BloomType.ROWCOL) { // Since a Row Delete is essentially a DeleteFamily applied to all // columns, a file might be skipped if using row+col Bloom filter. // In order to ensure this file is included an additional check is // required looking only for a row bloom. Cell rowBloomKey = CellUtil.createFirstOnRow(kvKey); // hbase:meta does not have blooms. So we need not have special interpretation // of the hbase:meta cells. We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom if (keyIsAfterLast && (CellComparator.COMPARATOR.compare(rowBloomKey, lastBloomKeyOnlyKV)) > 0) { exists = false; } else { exists = bloomFilter.contains(kvKey, bloom, BloomType.ROWCOL) || bloomFilter.contains(rowBloomKey, bloom, BloomType.ROWCOL); } } else { exists = !keyIsAfterLast && bloomFilter.contains(key, 0, key.length, bloom); } return exists; } } catch (IOException e) { LOG.error("Error reading bloom filter data -- proceeding without", e); setGeneralBloomFilterFaulty(); } catch (IllegalArgumentException e) { LOG.error("Bad bloom filter data -- proceeding without", e); setGeneralBloomFilterFaulty(); } finally { // Return the bloom block so that its ref count can be decremented. reader.returnBlock(bloomBlock); } return true; } /** * Checks whether the given scan rowkey range overlaps with the current storefile's * @param scan the scan specification. Used to determine the rowkey range. * @return true if there is overlap, false otherwise */ public boolean passesKeyRangeFilter(Scan scan) { if (this.getFirstKey() == null || this.getLastKey() == null) { // the file is empty return false; } if (Bytes.equals(scan.getStartRow(), HConstants.EMPTY_START_ROW) && Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) { return true; } byte[] smallestScanRow = scan.isReversed() ? scan.getStopRow() : scan.getStartRow(); byte[] largestScanRow = scan.isReversed() ? scan.getStartRow() : scan.getStopRow(); Cell firstKeyKV = this.getFirstKey(); Cell lastKeyKV = this.getLastKey(); boolean nonOverLapping = (getComparator().compareRows(firstKeyKV, largestScanRow, 0, largestScanRow.length) > 0 && !Bytes .equals(scan.isReversed() ? scan.getStartRow() : scan.getStopRow(), HConstants.EMPTY_END_ROW)) || getComparator().compareRows(lastKeyKV, smallestScanRow, 0, smallestScanRow.length) < 0; return !nonOverLapping; } public Map<byte[], byte[]> loadFileInfo() throws IOException { Map<byte [], byte []> fi = reader.loadFileInfo(); byte[] b = fi.get(StoreFile.BLOOM_FILTER_TYPE_KEY); if (b != null) { bloomFilterType = BloomType.valueOf(Bytes.toString(b)); } lastBloomKey = fi.get(StoreFile.LAST_BLOOM_KEY); if(bloomFilterType == BloomType.ROWCOL) { lastBloomKeyOnlyKV = new KeyValue.KeyOnlyKeyValue(lastBloomKey, 0, lastBloomKey.length); } byte[] cnt = fi.get(StoreFile.DELETE_FAMILY_COUNT); if (cnt != null) { deleteFamilyCnt = Bytes.toLong(cnt); } return fi; } public void loadBloomfilter() { this.loadBloomfilter(BlockType.GENERAL_BLOOM_META); this.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META); } public void loadBloomfilter(BlockType blockType) { try { if (blockType == BlockType.GENERAL_BLOOM_META) { if (this.generalBloomFilter != null) return; // Bloom has been loaded DataInput bloomMeta = reader.getGeneralBloomFilterMetadata(); if (bloomMeta != null) { // sanity check for NONE Bloom filter if (bloomFilterType == BloomType.NONE) { throw new IOException( "valid bloom filter type not found in FileInfo"); } else { generalBloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader); if (LOG.isTraceEnabled()) { LOG.trace("Loaded " + bloomFilterType.toString() + " " + generalBloomFilter.getClass().getSimpleName() + " metadata for " + reader.getName()); } } } } else if (blockType == BlockType.DELETE_FAMILY_BLOOM_META) { if (this.deleteFamilyBloomFilter != null) return; // Bloom has been loaded DataInput bloomMeta = reader.getDeleteBloomFilterMetadata(); if (bloomMeta != null) { deleteFamilyBloomFilter = BloomFilterFactory.createFromMeta( bloomMeta, reader); LOG.info("Loaded Delete Family Bloom (" + deleteFamilyBloomFilter.getClass().getSimpleName() + ") metadata for " + reader.getName()); } } else { throw new RuntimeException("Block Type: " + blockType.toString() + "is not supported for Bloom filter"); } } catch (IOException e) { LOG.error("Error reading bloom filter meta for " + blockType + " -- proceeding without", e); setBloomFilterFaulty(blockType); } catch (IllegalArgumentException e) { LOG.error("Bad bloom filter meta " + blockType + " -- proceeding without", e); setBloomFilterFaulty(blockType); } } private void setBloomFilterFaulty(BlockType blockType) { if (blockType == BlockType.GENERAL_BLOOM_META) { setGeneralBloomFilterFaulty(); } else if (blockType == BlockType.DELETE_FAMILY_BLOOM_META) { setDeleteFamilyBloomFilterFaulty(); } } /** * The number of Bloom filter entries in this store file, or an estimate * thereof, if the Bloom filter is not loaded. This always returns an upper * bound of the number of Bloom filter entries. * * @return an estimate of the number of Bloom filter entries in this file */ public long getFilterEntries() { return generalBloomFilter != null ? generalBloomFilter.getKeyCount() : reader.getEntries(); } public void setGeneralBloomFilterFaulty() { generalBloomFilter = null; } public void setDeleteFamilyBloomFilterFaulty() { this.deleteFamilyBloomFilter = null; } public Cell getLastKey() { return reader.getLastKey(); } public byte[] getLastRowKey() { return reader.getLastRowKey(); } public Cell midkey() throws IOException { return reader.midkey(); } public long length() { return reader.length(); } public long getTotalUncompressedBytes() { return reader.getTrailer().getTotalUncompressedBytes(); } public long getEntries() { return reader.getEntries(); } public long getDeleteFamilyCnt() { return deleteFamilyCnt; } public Cell getFirstKey() { return reader.getFirstKey(); } public long indexSize() { return reader.indexSize(); } public BloomType getBloomFilterType() { return this.bloomFilterType; } public long getSequenceID() { return sequenceID; } public void setSequenceID(long sequenceID) { this.sequenceID = sequenceID; } public void setBulkLoaded(boolean bulkLoadResult) { this.bulkLoadResult = bulkLoadResult; } public boolean isBulkLoaded() { return this.bulkLoadResult; } BloomFilter getGeneralBloomFilter() { return generalBloomFilter; } long getUncompressedDataIndexSize() { return reader.getTrailer().getUncompressedDataIndexSize(); } public long getTotalBloomSize() { if (generalBloomFilter == null) return 0; return generalBloomFilter.getByteSize(); } public int getHFileVersion() { return reader.getTrailer().getMajorVersion(); } public int getHFileMinorVersion() { return reader.getTrailer().getMinorVersion(); } public HFile.Reader getHFileReader() { return reader; } void disableBloomFilterForTesting() { generalBloomFilter = null; this.deleteFamilyBloomFilter = null; } public long getMaxTimestamp() { return timeRange == null ? TimeRange.INITIAL_MAX_TIMESTAMP: timeRange.getMax(); } boolean isSkipResetSeqId() { return skipResetSeqId; } void setSkipResetSeqId(boolean skipResetSeqId) { this.skipResetSeqId = skipResetSeqId; } }