/**
* Copyright 2009 The Apache Software Foundation Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this work for additional information regarding
* copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language governing permissions and limitations under the
* License.
*/
package org.apache.hadoop.hbase.regionserver.transactional;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.regionserver.InternalScanner;
import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
/**
* Holds the state of a transaction. This includes a buffer of all writes, a
* record of all reads / scans, and information about which other transactions
* we need to check against.
*/
class TransactionState {
private static final Log LOG = LogFactory.getLog(TransactionState.class);
/** Current status. */
public enum Status {
/** Initial status, still performing operations. */
PENDING,
/**
* Checked if we can commit, and said yes. Still need to determine the
* global decision.
*/
COMMIT_PENDING,
/** Committed. */
COMMITED,
/** Aborted. */
ABORTED
}
/**
* Simple container of the range of the scanners we've opened. Used to check
* for conflicting writes.
*/
private static class ScanRange {
protected byte[] startRow;
protected byte[] endRow;
public ScanRange(final byte[] startRow, final byte[] endRow) {
this.startRow = startRow == HConstants.EMPTY_START_ROW ? null
: startRow;
this.endRow = endRow == HConstants.EMPTY_END_ROW ? null : endRow;
}
/**
* Check if this scan range contains the given key.
*
* @param rowKey
* @return boolean
*/
public boolean contains(final byte[] rowKey) {
if (startRow != null && Bytes.compareTo(rowKey, startRow) < 0) {
return false;
}
if (endRow != null && Bytes.compareTo(endRow, rowKey) < 0) {
return false;
}
return true;
}
@Override
public String toString() {
return "startRow: "
+ (startRow == null ? "null" : Bytes.toString(startRow))
+ ", endRow: "
+ (endRow == null ? "null" : Bytes.toString(endRow));
}
}
private final HRegionInfo regionInfo;
private final long hLogStartSequenceId;
private final long transactionId;
private Status status;
private List<ScanRange> scans = new LinkedList<ScanRange>();
private List<Delete> deletes = new LinkedList<Delete>();
private List<WriteAction> writeOrdering = new LinkedList<WriteAction>();
private Set<TransactionState> transactionsToCheck = new HashSet<TransactionState>();
private int startSequenceNumber;
private Integer sequenceNumber;
private int commitPendingWaits = 0;
TransactionState(final long transactionId, final long rLogStartSequenceId,
final HRegionInfo regionInfo) {
this.transactionId = transactionId;
this.hLogStartSequenceId = rLogStartSequenceId;
this.regionInfo = regionInfo;
this.status = Status.PENDING;
}
void addRead(final byte[] rowKey) {
scans.add(new ScanRange(rowKey, rowKey));
}
void addWrite(final Put write) {
updateLatestTimestamp(write.getFamilyMap().values(),
EnvironmentEdgeManager.currentTimeMillis());
writeOrdering.add(new WriteAction(write));
}
static void updateLatestTimestamp(
final Collection<List<KeyValue>> kvsCollection, final long time) {
byte[] timeBytes = Bytes.toBytes(time);
// HAVE to manually set the KV timestamps
for (List<KeyValue> kvs : kvsCollection) {
for (KeyValue kv : kvs) {
if (kv.isLatestTimestamp()) {
kv.updateLatestStamp(timeBytes);
}
}
}
}
boolean hasWrite() {
return writeOrdering.size() > 0;
}
void addDelete(final Delete delete) {
long now = EnvironmentEdgeManager.currentTimeMillis();
updateLatestTimestamp(delete.getFamilyMap().values(), now);
if (delete.getTimeStamp() == HConstants.LATEST_TIMESTAMP) {
delete.setTimestamp(now);
}
deletes.add(delete);
writeOrdering.add(new WriteAction(delete));
}
void applyDeletes(final List<KeyValue> input, final long minTime,
final long maxTime) {
if (deletes.isEmpty()) {
return;
}
for (Iterator<KeyValue> itr = input.iterator(); itr.hasNext();) {
KeyValue included = applyDeletes(itr.next(), minTime, maxTime);
if (null == included) {
itr.remove();
}
}
}
KeyValue applyDeletes(final KeyValue kv, final long minTime,
final long maxTime) {
if (deletes.isEmpty()) {
return kv;
}
for (Delete delete : deletes) {
// Skip if delete should not apply
if (!Bytes.equals(kv.getRow(), delete.getRow())
|| kv.getTimestamp() > delete.getTimeStamp()
|| delete.getTimeStamp() > maxTime
|| delete.getTimeStamp() < minTime) {
continue;
}
// Whole-row delete
if (delete.isEmpty()) {
return null;
}
for (Entry<byte[], List<KeyValue>> deleteEntry : delete
.getFamilyMap().entrySet()) {
byte[] family = deleteEntry.getKey();
if (!Bytes.equals(kv.getFamily(), family)) {
continue;
}
List<KeyValue> familyDeletes = deleteEntry.getValue();
if (familyDeletes == null) {
return null;
}
for (KeyValue keyDeletes : familyDeletes) {
byte[] deleteQualifier = keyDeletes.getQualifier();
byte[] kvQualifier = kv.getQualifier();
if (keyDeletes.getTimestamp() > kv.getTimestamp()
&& Bytes.equals(deleteQualifier, kvQualifier)) {
return null;
}
}
}
}
return kv;
}
void addTransactionToCheck(final TransactionState transaction) {
transactionsToCheck.add(transaction);
}
boolean hasConflict() {
for (TransactionState transactionState : transactionsToCheck) {
if (hasConflict(transactionState)) {
return true;
}
}
return false;
}
private boolean hasConflict(final TransactionState checkAgainst) {
if (checkAgainst.getStatus().equals(TransactionState.Status.ABORTED)) {
return false; // Cannot conflict with aborted transactions
}
for (WriteAction otherUpdate : checkAgainst.writeOrdering) {
byte[] row = otherUpdate.getRow();
for (ScanRange scanRange : this.scans) {
if (scanRange.contains(row)) {
LOG.debug("Transaction [" + this.toString()
+ "] has scan which conflicts with ["
+ checkAgainst.toString() + "]: region ["
+ regionInfo.getRegionNameAsString()
+ "], scanRange[" + scanRange.toString()
+ "] ,row[" + Bytes.toString(row) + "]");
return true;
}
}
}
return false;
}
/**
* Get the status.
*
* @return Return the status.
*/
Status getStatus() {
return status;
}
/**
* Set the status.
*
* @param status
* The status to set.
*/
void setStatus(final Status status) {
this.status = status;
}
/**
* Get the startSequenceNumber.
*
* @return Return the startSequenceNumber.
*/
int getStartSequenceNumber() {
return startSequenceNumber;
}
/**
* Set the startSequenceNumber.
*
* @param startSequenceNumber
*/
void setStartSequenceNumber(final int startSequenceNumber) {
this.startSequenceNumber = startSequenceNumber;
}
/**
* Get the sequenceNumber.
*
* @return Return the sequenceNumber.
*/
Integer getSequenceNumber() {
return sequenceNumber;
}
/**
* Set the sequenceNumber.
*
* @param sequenceNumber
* The sequenceNumber to set.
*/
void setSequenceNumber(final Integer sequenceNumber) {
this.sequenceNumber = sequenceNumber;
}
@Override
public String toString() {
StringBuilder result = new StringBuilder();
result.append("[transactionId: ");
result.append(transactionId);
result.append(" status: ");
result.append(status.name());
result.append(" scan Size: ");
result.append(scans.size());
result.append(" write Size: ");
result.append(getWriteOrdering().size());
result.append(" startSQ: ");
result.append(startSequenceNumber);
if (sequenceNumber != null) {
result.append(" commitedSQ:");
result.append(sequenceNumber);
}
result.append("]");
return result.toString();
}
/**
* Get the transactionId.
*
* @return Return the transactionId.
*/
long getTransactionId() {
return transactionId;
}
/**
* Get the startSequenceId.
*
* @return Return the startSequenceId.
*/
long getHLogStartSequenceId() {
return hLogStartSequenceId;
}
void addScan(final Scan scan) {
ScanRange scanRange = new ScanRange(scan.getStartRow(),
scan.getStopRow());
LOG.trace(String.format(
"Adding scan for transcaction [%s], from [%s] to [%s]",
transactionId,
scanRange.startRow == null ? "null" : Bytes
.toString(scanRange.startRow),
scanRange.endRow == null ? "null" : Bytes
.toString(scanRange.endRow)));
scans.add(scanRange);
}
int getCommitPendingWaits() {
return commitPendingWaits;
}
void incrementCommitPendingWaits() {
this.commitPendingWaits++;
}
/**
* Get deletes.
*
* @return deletes
*/
List<Delete> getDeletes() {
return deletes;
}
/**
* Get a scanner to go through the puts and deletes from this transaction.
* Used to weave together the local trx puts with the global state.
*
* @return scanner
*/
KeyValueScanner getScanner(final Scan scan) {
return new TransactionScanner(scan);
}
private KeyValue[] getAllKVs(final Scan scan) {
List<KeyValue> kvList = new ArrayList<KeyValue>();
for (WriteAction action : writeOrdering) {
byte[] row = action.getRow();
List<KeyValue> kvs = action.getKeyValues();
if (scan.getStartRow() != null
&& !Bytes.equals(scan.getStartRow(),
HConstants.EMPTY_START_ROW)
&& Bytes.compareTo(row, scan.getStartRow()) < 0) {
continue;
}
if (scan.getStopRow() != null
&& !Bytes.equals(scan.getStopRow(),
HConstants.EMPTY_END_ROW)
&& Bytes.compareTo(row, scan.getStopRow()) > 0) {
continue;
}
kvList.addAll(kvs);
}
return kvList.toArray(new KeyValue[kvList.size()]);
}
private int getTransactionSequenceIndex(final KeyValue kv) {
for (int i = 0; i < writeOrdering.size(); i++) {
WriteAction action = writeOrdering.get(i);
if (isKvInPut(kv, action.getPut())) {
return i;
}
if (isKvInDelete(kv, action.getDelete())) {
return i;
}
}
throw new IllegalStateException("Can not find kv in transaction writes");
}
private boolean isKvInPut(final KeyValue kv, final Put put) {
if (null != put) {
for (List<KeyValue> putKVs : put.getFamilyMap().values()) {
for (KeyValue putKV : putKVs) {
if (putKV == kv) {
return true;
}
}
}
}
return false;
}
private boolean isKvInDelete(final KeyValue kv, final Delete delete) {
if (null != delete) {
for (List<KeyValue> putKVs : delete.getFamilyMap().values()) {
for (KeyValue deleteKv : putKVs) {
if (deleteKv == kv) {
return true;
}
}
}
}
return false;
}
/**
* Scanner of the puts and deletes that occur during this transaction.
*
* @author clint.morgan
*/
private class TransactionScanner extends KeyValueListScanner implements
InternalScanner {
private ScanQueryMatcher matcher;
TransactionScanner(final Scan scan) {
super(new KeyValue.KVComparator() {
@Override
public int compare(final KeyValue left, final KeyValue right) {
int result = super.compare(left, right);
if (result != 0) {
return result;
}
if (left == right) {
return 0;
}
int put1Number = getTransactionSequenceIndex(left);
int put2Number = getTransactionSequenceIndex(right);
return put2Number - put1Number;
}
}, getAllKVs(scan));
// We want transaction scanner to always take priority over store
// scanners.
setSequenceID(Long.MAX_VALUE);
// matcher = new ScanQueryMatcher(scan, null, null,
// HConstants.FOREVER, KeyValue.KEY_COMPARATOR,
// scan.getMaxVersions());
matcher = new ScanQueryMatcher(scan, null, null,
null, Long.MAX_VALUE, HConstants.LATEST_TIMESTAMP,
EnvironmentEdgeManager.currentTimeMillis());
}
/**
* Get the next row of values from this transaction.
*
* @param outResult
* @param limit
* @return true if there are more rows, false if scanner is done
*/
@Override
public synchronized boolean next(final List<KeyValue> outResult,
final int limit) throws IOException {
KeyValue peeked = this.peek();
if (peeked == null) {
close();
return false;
}
matcher.setRow(peeked.getRow());
KeyValue kv;
List<KeyValue> results = new ArrayList<KeyValue>();
LOOP: while ((kv = this.peek()) != null) {
ScanQueryMatcher.MatchCode qcode = matcher.match(kv);
switch (qcode) {
case INCLUDE:
KeyValue next = this.next();
results.add(next);
if (limit > 0 && results.size() == limit) {
break LOOP;
}
continue;
case DONE:
// copy jazz
outResult.addAll(results);
return true;
case DONE_SCAN:
close();
// copy jazz
outResult.addAll(results);
return false;
case SEEK_NEXT_ROW:
this.next();
break;
case SEEK_NEXT_COL:
this.next();
break;
case SKIP:
this.next();
break;
default:
throw new RuntimeException("UNEXPECTED");
}
}
if (!results.isEmpty()) {
// copy jazz
outResult.addAll(results);
return true;
}
// No more keys
close();
return false;
}
@Override
public boolean next(final List<KeyValue> results) throws IOException {
return next(results, -1);
}
}
/**
* Simple wrapper for Put and Delete since they don't have a common enough
* interface.
*/
class WriteAction {
private Put put;
private Delete delete;
public WriteAction(final Put put) {
if (null == put) {
throw new IllegalArgumentException(
"WriteAction requires a Put or a Delete.");
}
this.put = put;
}
public WriteAction(final Delete delete) {
if (null == delete) {
throw new IllegalArgumentException(
"WriteAction requires a Put or a Delete.");
}
this.delete = delete;
}
public Put getPut() {
return put;
}
public Delete getDelete() {
return delete;
}
public byte[] getRow() {
if (put != null) {
return put.getRow();
} else if (delete != null) {
return delete.getRow();
}
throw new IllegalStateException("WriteAction is invalid");
}
@SuppressWarnings("deprecation")
List<KeyValue> getKeyValues() {
List<KeyValue> edits = new ArrayList<KeyValue>();
Collection<List<KeyValue>> kvsList;
if (put != null) {
kvsList = put.getFamilyMap().values();
} else if (delete != null) {
if (delete.getFamilyMap().isEmpty()) {
// If whole-row delete then we need to expand for each
// family
kvsList = new ArrayList<List<KeyValue>>(1);
for (byte[] family : regionInfo.getTableDesc()
.getFamiliesKeys()) {
KeyValue familyDelete = new KeyValue(delete.getRow(),
family, null, delete.getTimeStamp(),
KeyValue.Type.DeleteFamily);
kvsList.add(Collections.singletonList(familyDelete));
}
} else {
kvsList = delete.getFamilyMap().values();
}
} else {
throw new IllegalStateException("WriteAction is invalid");
}
for (List<KeyValue> kvs : kvsList) {
for (KeyValue kv : kvs) {
edits.add(kv);
}
}
return edits;
}
}
/**
* Get the puts and deletes in transaction order.
*
* @return Return the writeOrdering.
*/
List<WriteAction> getWriteOrdering() {
return writeOrdering;
}
}