LegacyScanQueryMatcher.java example

Explorer
hbase-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver.querymatcher;

import com.google.common.base.Preconditions;

import java.io.IOException;
import java.util.Arrays;
import java.util.NavigableSet;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeepDeletedCells;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
import org.apache.hadoop.hbase.io.TimeRange;
import org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost;
import org.apache.hadoop.hbase.regionserver.ScanInfo;
import org.apache.hadoop.hbase.regionserver.ScanType;
import org.apache.hadoop.hbase.regionserver.querymatcher.DeleteTracker.DeleteResult;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;

/**
 * The old query matcher implementation. Used to keep compatibility for coprocessor that could
 * overwrite the StoreScanner before compaction. Should be removed once we find a better way to do
 * filtering during compaction.
 */
@Deprecated
@InterfaceAudience.Private
public class LegacyScanQueryMatcher extends ScanQueryMatcher {

  private final TimeRange tr;

  private final Filter filter;

  /** Keeps track of deletes */
  private final DeleteTracker deletes;

  /**
   * The following three booleans define how we deal with deletes. There are three different
   * aspects:
   * <ol>
   * <li>Whether to keep delete markers. This is used in compactions. Minor compactions always keep
   * delete markers.</li>
   * <li>Whether to keep deleted rows. This is also used in compactions, if the store is set to keep
   * deleted rows. This implies keeping the delete markers as well.</li> In this case deleted rows
   * are subject to the normal max version and TTL/min version rules just like "normal" rows.
   * <li>Whether a scan can do time travel queries even before deleted marker to reach deleted
   * rows.</li>
   * </ol>
   */
  /** whether to retain delete markers */
  private boolean retainDeletesInOutput;

  /** whether to return deleted rows */
  private final KeepDeletedCells keepDeletedCells;

  // By default, when hbase.hstore.time.to.purge.deletes is 0ms, a delete
  // marker is always removed during a major compaction. If set to non-zero
  // value then major compaction will try to keep a delete marker around for
  // the given number of milliseconds. We want to keep the delete markers
  // around a bit longer because old puts might appear out-of-order. For
  // example, during log replication between two clusters.
  //
  // If the delete marker has lived longer than its column-family's TTL then
  // the delete marker will be removed even if time.to.purge.deletes has not
  // passed. This is because all the Puts that this delete marker can influence
  // would have also expired. (Removing of delete markers on col family TTL will
  // not happen if min-versions is set to non-zero)
  //
  // But, if time.to.purge.deletes has not expired then a delete
  // marker will not be removed just because there are no Puts that it is
  // currently influencing. This is because Puts, that this delete can
  // influence. may appear out of order.
  private final long timeToPurgeDeletes;

  /**
   * This variable shows whether there is an null column in the query. There always exists a null
   * column in the wildcard column query. There maybe exists a null column in the explicit column
   * query based on the first column.
   */
  private final boolean hasNullColumn;

  /** readPoint over which the KVs are unconditionally included */
  private final long maxReadPointToTrackVersions;

  /**
   * Oldest put in any of the involved store files Used to decide whether it is ok to delete family
   * delete marker of this store keeps deleted KVs.
   */
  protected final long earliestPutTs;

  private final byte[] stopRow;

  private byte[] dropDeletesFromRow = null, dropDeletesToRow = null;

  private LegacyScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker columns,
      boolean hasNullColumn, DeleteTracker deletes, ScanType scanType, long readPointToUse,
      long earliestPutTs, long oldestUnexpiredTS, long now) {
    super(createStartKeyFromRow(scan.getStartRow(), scanInfo), scanInfo, columns, oldestUnexpiredTS,
        now);
    TimeRange timeRange = scan.getColumnFamilyTimeRange().get(scanInfo.getFamily());
    if (timeRange == null) {
      this.tr = scan.getTimeRange();
    } else {
      this.tr = timeRange;
    }
    this.hasNullColumn = hasNullColumn;
    this.deletes = deletes;
    this.filter = scan.getFilter();
    this.maxReadPointToTrackVersions = readPointToUse;
    this.timeToPurgeDeletes = scanInfo.getTimeToPurgeDeletes();
    this.earliestPutTs = earliestPutTs;

    /* how to deal with deletes */
    this.keepDeletedCells = scanInfo.getKeepDeletedCells();
    this.retainDeletesInOutput = scanType == ScanType.COMPACT_RETAIN_DELETES;
    this.stopRow = scan.getStopRow();
  }

  private LegacyScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker columns,
      boolean hasNullColumn, DeleteTracker deletes, ScanType scanType, long readPointToUse,
      long earliestPutTs, long oldestUnexpiredTS, long now, byte[] dropDeletesFromRow,
      byte[] dropDeletesToRow) {
    this(scan, scanInfo, columns, hasNullColumn, deletes, scanType, readPointToUse, earliestPutTs,
        oldestUnexpiredTS, now);
    this.dropDeletesFromRow = Preconditions.checkNotNull(dropDeletesFromRow);
    this.dropDeletesToRow = Preconditions.checkNotNull(dropDeletesToRow);
  }

  @Override
  public void beforeShipped() throws IOException {
    super.beforeShipped();
    deletes.beforeShipped();
  }

  @Override
  public MatchCode match(Cell cell) throws IOException {
    if (filter != null && filter.filterAllRemaining()) {
      return MatchCode.DONE_SCAN;
    }
    MatchCode returnCode = preCheck(cell);
    if (returnCode != null) {
      return returnCode;
    }
    /*
     * The delete logic is pretty complicated now.
     * This is corroborated by the following:
     * 1. The store might be instructed to keep deleted rows around.
     * 2. A scan can optionally see past a delete marker now.
     * 3. If deleted rows are kept, we have to find out when we can
     *    remove the delete markers.
     * 4. Family delete markers are always first (regardless of their TS)
     * 5. Delete markers should not be counted as version
     * 6. Delete markers affect puts of the *same* TS
     * 7. Delete marker need to be version counted together with puts
     *    they affect
     */
    long timestamp = cell.getTimestamp();
    byte typeByte = cell.getTypeByte();
    long mvccVersion = cell.getSequenceId();
    if (CellUtil.isDelete(typeByte)) {
      if (keepDeletedCells == KeepDeletedCells.FALSE
          || (keepDeletedCells == KeepDeletedCells.TTL && timestamp < oldestUnexpiredTS)) {
        // first ignore delete markers if the scanner can do so, and the
        // range does not include the marker
        //
        // during flushes and compactions also ignore delete markers newer
        // than the readpoint of any open scanner, this prevents deleted
        // rows that could still be seen by a scanner from being collected
        boolean includeDeleteMarker = tr.withinOrAfterTimeRange(timestamp);
        if (includeDeleteMarker && mvccVersion <= maxReadPointToTrackVersions) {
          this.deletes.add(cell);
        }
        // Can't early out now, because DelFam come before any other keys
      }

      if (timeToPurgeDeletes > 0
          && (EnvironmentEdgeManager.currentTime() - timestamp) <= timeToPurgeDeletes) {
        return MatchCode.INCLUDE;
      } else if (retainDeletesInOutput || mvccVersion > maxReadPointToTrackVersions) {
        // always include or it is not time yet to check whether it is OK
        // to purge deltes or not
        // if this is not a user scan (compaction), we can filter this deletemarker right here
        // otherwise (i.e. a "raw" scan) we fall through to normal version and timerange checking
        return MatchCode.INCLUDE;
      } else if (keepDeletedCells == KeepDeletedCells.TRUE
          || (keepDeletedCells == KeepDeletedCells.TTL && timestamp >= oldestUnexpiredTS)) {
        if (timestamp < earliestPutTs) {
          // keeping delete rows, but there are no puts older than
          // this delete in the store files.
          return columns.getNextRowOrNextColumn(cell);
        }
        // else: fall through and do version counting on the
        // delete markers
      } else {
        return MatchCode.SKIP;
      }
      // note the following next else if...
      // delete marker are not subject to other delete markers
    } else if (!this.deletes.isEmpty()) {
      DeleteResult deleteResult = deletes.isDeleted(cell);
      switch (deleteResult) {
        case FAMILY_DELETED:
        case COLUMN_DELETED:
          return columns.getNextRowOrNextColumn(cell);
        case VERSION_DELETED:
        case FAMILY_VERSION_DELETED:
          return MatchCode.SKIP;
        case NOT_DELETED:
          break;
        default:
          throw new RuntimeException("UNEXPECTED");
        }
    }

    int timestampComparison = tr.compare(timestamp);
    if (timestampComparison >= 1) {
      return MatchCode.SKIP;
    } else if (timestampComparison <= -1) {
      return columns.getNextRowOrNextColumn(cell);
    }

    // STEP 1: Check if the column is part of the requested columns
    MatchCode colChecker = columns.checkColumn(cell, typeByte);
    if (colChecker == MatchCode.INCLUDE) {
      ReturnCode filterResponse = ReturnCode.SKIP;
      // STEP 2: Yes, the column is part of the requested columns. Check if filter is present
      if (filter != null) {
        // STEP 3: Filter the key value and return if it filters out
        filterResponse = filter.filterKeyValue(cell);
        switch (filterResponse) {
        case SKIP:
          return MatchCode.SKIP;
        case NEXT_COL:
          return columns.getNextRowOrNextColumn(cell);
        case NEXT_ROW:
          return MatchCode.SEEK_NEXT_ROW;
        case SEEK_NEXT_USING_HINT:
          return MatchCode.SEEK_NEXT_USING_HINT;
        default:
          //It means it is either include or include and seek next
          break;
        }
      }
      /*
       * STEP 4: Reaching this step means the column is part of the requested columns and either
       * the filter is null or the filter has returned INCLUDE or INCLUDE_AND_NEXT_COL response.
       * Now check the number of versions needed. This method call returns SKIP, INCLUDE,
       * INCLUDE_AND_SEEK_NEXT_ROW, INCLUDE_AND_SEEK_NEXT_COL.
       *
       * FilterResponse            ColumnChecker               Desired behavior
       * INCLUDE                   SKIP                        row has already been included, SKIP.
       * INCLUDE                   INCLUDE                     INCLUDE
       * INCLUDE                   INCLUDE_AND_SEEK_NEXT_COL   INCLUDE_AND_SEEK_NEXT_COL
       * INCLUDE                   INCLUDE_AND_SEEK_NEXT_ROW   INCLUDE_AND_SEEK_NEXT_ROW
       * INCLUDE_AND_SEEK_NEXT_COL SKIP                        row has already been included, SKIP.
       * INCLUDE_AND_SEEK_NEXT_COL INCLUDE                     INCLUDE_AND_SEEK_NEXT_COL
       * INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_COL   INCLUDE_AND_SEEK_NEXT_COL
       * INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW   INCLUDE_AND_SEEK_NEXT_ROW
       *
       * In all the above scenarios, we return the column checker return value except for
       * FilterResponse (INCLUDE_AND_SEEK_NEXT_COL) and ColumnChecker(INCLUDE)
       */
      colChecker = columns.checkVersions(cell, timestamp, typeByte,
          mvccVersion > maxReadPointToTrackVersions);
      if (filterResponse == ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW) {
        if (colChecker != MatchCode.SKIP) {
          return MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
        }
        return MatchCode.SEEK_NEXT_ROW;
      }
      return (filterResponse == ReturnCode.INCLUDE_AND_NEXT_COL &&
          colChecker == MatchCode.INCLUDE) ? MatchCode.INCLUDE_AND_SEEK_NEXT_COL
          : colChecker;
    }
    return colChecker;
  }

  @Override
  public boolean hasNullColumnInQuery() {
    return hasNullColumn;
  }

  /**
   * Handle partial-drop-deletes. As we match keys in order, when we have a range from which we can
   * drop deletes, we can set retainDeletesInOutput to false for the duration of this range only,
   * and maintain consistency.
   */
  private void checkPartialDropDeleteRange(Cell curCell) {
    // If partial-drop-deletes are used, initially, dropDeletesFromRow and dropDeletesToRow
    // are both set, and the matcher is set to retain deletes. We assume ordered keys. When
    // dropDeletesFromRow is leq current kv, we start dropping deletes and reset
    // dropDeletesFromRow; thus the 2nd "if" starts to apply.
    if ((dropDeletesFromRow != null)
        && (Arrays.equals(dropDeletesFromRow, HConstants.EMPTY_START_ROW)
            || (CellComparator.COMPARATOR.compareRows(curCell, dropDeletesFromRow, 0,
              dropDeletesFromRow.length) >= 0))) {
      retainDeletesInOutput = false;
      dropDeletesFromRow = null;
    }
    // If dropDeletesFromRow is null and dropDeletesToRow is set, we are inside the partial-
    // drop-deletes range. When dropDeletesToRow is leq current kv, we stop dropping deletes,
    // and reset dropDeletesToRow so that we don't do any more compares.
    if ((dropDeletesFromRow == null) && (dropDeletesToRow != null)
        && !Arrays.equals(dropDeletesToRow, HConstants.EMPTY_END_ROW) && (CellComparator.COMPARATOR
            .compareRows(curCell, dropDeletesToRow, 0, dropDeletesToRow.length) >= 0)) {
      retainDeletesInOutput = true;
      dropDeletesToRow = null;
    }
  }

  @Override
  protected void reset() {
    checkPartialDropDeleteRange(currentRow);
  }

  @Override
  public boolean isUserScan() {
    return false;
  }

  @Override
  public boolean moreRowsMayExistAfter(Cell cell) {
    if (this.stopRow == null || this.stopRow.length == 0) {
      return true;
    }
    return rowComparator.compareRows(cell, stopRow, 0, stopRow.length) < 0;
  }

  @Override
  public Filter getFilter() {
    return filter;
  }

  @Override
  public Cell getNextKeyHint(Cell cell) throws IOException {
    if (filter == null) {
      return null;
    } else {
      return filter.getNextCellHint(cell);
    }
  }

  public static LegacyScanQueryMatcher create(Scan scan, ScanInfo scanInfo,
      NavigableSet<byte[]> columns, ScanType scanType, long readPointToUse, long earliestPutTs,
      long oldestUnexpiredTS, long now, byte[] dropDeletesFromRow, byte[] dropDeletesToRow,
      RegionCoprocessorHost regionCoprocessorHost) throws IOException {
    int maxVersions = Math.min(scan.getMaxVersions(), scanInfo.getMaxVersions());
    boolean hasNullColumn;
    ColumnTracker columnTracker;
    if (columns == null || columns.isEmpty()) {
      // there is always a null column in the wildcard column query.
      hasNullColumn = true;
      // use a specialized scan for wildcard column tracker.
      columnTracker = new ScanWildcardColumnTracker(scanInfo.getMinVersions(), maxVersions,
          oldestUnexpiredTS);
    } else {
      // We can share the ExplicitColumnTracker, diff is we reset
      // between rows, not between storefiles.
      // whether there is null column in the explicit column query
      hasNullColumn = columns.first().length == 0;
      columnTracker = new ExplicitColumnTracker(columns, scanInfo.getMinVersions(), maxVersions,
          oldestUnexpiredTS);
    }
    DeleteTracker deletes = instantiateDeleteTracker(regionCoprocessorHost);
    if (dropDeletesFromRow == null) {
      return new LegacyScanQueryMatcher(scan, scanInfo, columnTracker, hasNullColumn, deletes,
          scanType, readPointToUse, earliestPutTs, oldestUnexpiredTS, now);
    } else {
      return new LegacyScanQueryMatcher(scan, scanInfo, columnTracker, hasNullColumn, deletes,
          scanType, readPointToUse, earliestPutTs, oldestUnexpiredTS, now, dropDeletesFromRow,
          dropDeletesToRow);
    }
  }
}