/** * Copyright 2009 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.regionserver.wal; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.NavigableMap; import java.util.TreeMap; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.io.Writable; /** * WALEdit: Used in HBase's transaction log (WAL) to represent * the collection of edits (KeyValue objects) corresponding to a * single transaction. The class implements "Writable" interface * for serializing/deserializing a set of KeyValue items. * * Previously, if a transaction contains 3 edits to c1, c2, c3 for a row R, * the HLog would have three log entries as follows: * * <logseq1-for-edit1>:<KeyValue-for-edit-c1> * <logseq2-for-edit2>:<KeyValue-for-edit-c2> * <logseq3-for-edit3>:<KeyValue-for-edit-c3> * * This presents problems because row level atomicity of transactions * was not guaranteed. If we crash after few of the above appends make * it, then recovery will restore a partial transaction. * * In the new world, all the edits for a given transaction are written * out as a single record, for example: * * <logseq#-for-entire-txn>:<WALEdit-for-entire-txn> * * where, the WALEdit is serialized as: * <-1, # of edits, <KeyValue>, <KeyValue>, ... > * For example: * <-1, 3, <Keyvalue-for-edit-c1>, <KeyValue-for-edit-c2>, <KeyValue-for-edit-c3>> * * The -1 marker is just a special way of being backward compatible with * an old HLog which would have contained a single <KeyValue>. * * The deserializer for WALEdit backward compatibly detects if the record * is an old style KeyValue or the new style WALEdit. * */ public class WALEdit implements Writable, HeapSize { private final int VERSION_2 = -1; private final ArrayList<KeyValue> kvs = new ArrayList<KeyValue>(); private NavigableMap<byte[], Integer> scopes; public WALEdit() { } public void add(KeyValue kv) { this.kvs.add(kv); } public boolean isEmpty() { return kvs.isEmpty(); } public int size() { return kvs.size(); } public List<KeyValue> getKeyValues() { return kvs; } public NavigableMap<byte[], Integer> getScopes() { return scopes; } public void setScopes (NavigableMap<byte[], Integer> scopes) { // We currently process the map outside of WALEdit, // TODO revisit when replication is part of core this.scopes = scopes; } public void readFields(DataInput in) throws IOException { kvs.clear(); if (scopes != null) { scopes.clear(); } int versionOrLength = in.readInt(); if (versionOrLength == VERSION_2) { // this is new style HLog entry containing multiple KeyValues. int numEdits = in.readInt(); for (int idx = 0; idx < numEdits; idx++) { KeyValue kv = new KeyValue(); kv.readFields(in); this.add(kv); } int numFamilies = in.readInt(); if (numFamilies > 0) { if (scopes == null) { scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR); } for (int i = 0; i < numFamilies; i++) { byte[] fam = Bytes.readByteArray(in); int scope = in.readInt(); scopes.put(fam, scope); } } } else { // this is an old style HLog entry. The int that we just // read is actually the length of a single KeyValue. KeyValue kv = new KeyValue(); kv.readFields(versionOrLength, in); this.add(kv); } } public void write(DataOutput out) throws IOException { out.writeInt(VERSION_2); out.writeInt(kvs.size()); // We interleave the two lists for code simplicity for (KeyValue kv : kvs) { kv.write(out); } if (scopes == null) { out.writeInt(0); } else { out.writeInt(scopes.size()); for (byte[] key : scopes.keySet()) { Bytes.writeByteArray(out, key); out.writeInt(scopes.get(key)); } } } public long heapSize() { long ret = 0; for (KeyValue kv : kvs) { ret += kv.heapSize(); } if (scopes != null) { ret += ClassSize.TREEMAP; ret += ClassSize.align(scopes.size() * ClassSize.MAP_ENTRY); // TODO this isn't quite right, need help here } return ret; } public String toString() { StringBuilder sb = new StringBuilder(); sb.append("[#edits: " + kvs.size() + " = <"); for (KeyValue kv : kvs) { sb.append(kv.toString()); sb.append("; "); } if (scopes != null) { sb.append(" scopes: " + scopes.toString()); } sb.append(">]"); return sb.toString(); } }