/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode.bookkeeper.metadata;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.bookkeeper.client.LedgerHandle;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hdfs.server.namenode.bookkeeper.BookKeeperJournalManager;
import org.apache.hadoop.hdfs.server.namenode.bookkeeper.metadata.proto.EditLogLedgerMetadataWritable;
import org.apache.hadoop.hdfs.server.namenode.bookkeeper.metadata.proto.WritableUtil;
import org.apache.hadoop.hdfs.server.namenode.bookkeeper.zk.RecoveringZooKeeper;
import org.apache.hadoop.hdfs.server.namenode.bookkeeper.zk.ZooKeeperIface;
import org.apache.hadoop.io.Writable;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.data.Stat;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.TreeSet;
import static org.apache.hadoop.hdfs.server.namenode.bookkeeper.zk.ZkUtil.deleteRecursively;
import static org.apache.hadoop.hdfs.server.namenode.bookkeeper.zk.ZkUtil.interruptedException;
import static org.apache.hadoop.hdfs.server.namenode.bookkeeper.zk.ZkUtil.joinPath;
import static org.apache.hadoop.hdfs.server.namenode.bookkeeper.zk.ZkUtil.keeperException;
/**
* Since BookKeeper only keeps track of numeric ledger ids, we need to
* maintain our own mapping of edit log segment information (first and
* last transaction ids, or whether or not a segment is in progress)
* in a separate metadata store (currently this store is ZooKeeper).
* </p>
* The purpose of this class is to provide a path-based API for managing edit
* log segment metadata associated with BookKeeper ledgers and to encapsulate
* ZooKeeper handling (listing ZNode children, reading, writing, and
* serializing to ZNodes), prevents "leaks" of ZooKeeper exception
* (e.g., returning null if a ZNode we are trying to read does not exist,
* and returning false if a ZNode we are trying to create already exists instead
* of propagating ZooKeeper exceptions upwards) and can
* (if {@link RecoveringZooKeeper} is passed in as {@link ZooKeeperIface}
* implementation in the constructor) also handle recovery from ZooKeeper
* errors such as connection loss.
*/
public class BookKeeperJournalMetadataManager {
/** Prefix for ZNodes holding metadata for in-progress log segments */
public static final String BKJM_EDIT_INPROGRESS = "inprogress_";
/** Suffix for ZNodes holding metadata for corrupt regions */
@VisibleForTesting
public static final String BKJM_EDIT_CORRUPT = ".corrupt";
private static final Log LOG =
LogFactory.getLog(BookKeeperJournalMetadataManager.class);
private static final ThreadLocal<EditLogLedgerMetadataWritable>
localWritable = new ThreadLocal<EditLogLedgerMetadataWritable>(){
@Override
protected EditLogLedgerMetadataWritable initialValue() {
return new EditLogLedgerMetadataWritable();
}
};
// It is up to the implementation of ZooKeeperIface to handle retries
// and re-connecting to ZooKeeper, e.g., RecoveringZooKeeper
private final ZooKeeperIface zooKeeper;
// A common prefix for all BK-related ZNodes. This can usually be set to
// the name of the HDFS namespace
private final String zooKeeperParentPath;
// Information for a ledger would be stored as a child under this ZNode
private final String ledgerParentPath;
/**
* Create a new instance and the required ZooKeeper znodes (if they do not
* already exist).
* @param zooKeeper The {@link ZooKeeperIface} implementation to use. This
* instance is responsible for any handling of ZooKeeper
* connection loss
* <b>it is recommended to use {@link RecoveringZooKeeper}</b>
* @param zooKeeperParentPath The ZooKeeper namespace for all
* {@link BookKeeperJournalManager} related ZNodes.
* This be can be the same as the HDFS namespace.
* @throws IOException If unrecoverable error when initializing the
* ZooKeeper namespace.
*/
public BookKeeperJournalMetadataManager(ZooKeeperIface zooKeeper,
String zooKeeperParentPath) throws IOException {
this.zooKeeper = zooKeeper;
this.zooKeeperParentPath = zooKeeperParentPath;
this.ledgerParentPath =
joinPath(zooKeeperParentPath, "ledgers");
}
public String getLedgerParentPath() {
return ledgerParentPath;
}
/**
* Create znodes for storing ledger metadata if they have not been
* created before
* @throws IOException If there an unrecoverable error talking to ZooKeeper
*/
public void init() throws IOException {
try {
if (zooKeeper.exists(zooKeeperParentPath, false) == null) {
zooKeeper.create(zooKeeperParentPath, new byte[] { '0' },
ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
LOG.info("Created ZNode " + zooKeeperParentPath);
}
if (zooKeeper.exists(ledgerParentPath, false) == null) {
zooKeeper.create(ledgerParentPath, new byte[] { '0' },
ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
LOG.info("Created ZNode" + ledgerParentPath);
}
} catch (InterruptedException e) {
interruptedException("Interrupted ensuring that ZNodes " +
zooKeeperParentPath + " and " + ledgerParentPath + " exist!", e);
} catch (KeeperException e) {
keeperException(
"Unrecoverable ZooKeeper error ensuring that ZNodes "
+ zooKeeperParentPath + " and " + ledgerParentPath + " exist!",
e);
}
}
/**
* Create a name to use for a ZNode holding a ledger's metadata
* @param e Metadata object that we want to create a name for
* @return A string containing the name of a ledger's child ZNode
*/
public static String nameForLedger(EditLogLedgerMetadata e) {
return nameForLedger(e.getFirstTxId(), e.getLastTxId());
}
private static String nameForLedger(long firstTxId, long lastTxId) {
boolean isInProgress = lastTxId == -1;
StringBuilder nameBuilder = new StringBuilder();
nameBuilder.append("ledger-");
if (isInProgress) {
nameBuilder.append(BKJM_EDIT_INPROGRESS)
.append(firstTxId);
} else {
nameBuilder.append(firstTxId)
.append("_")
.append(lastTxId);
}
return nameBuilder.toString();
}
public Versioned<EditLogLedgerMetadata> findInProgressLedger(long firstTxId)
throws IOException {
String fullyQualifiedInProgressPath =
fullyQualifiedPathForLedger(nameForLedger(firstTxId, -1));
// TODO: try using a ThreadLocal here as well. Will test this once unit
// test is written for this method
Stat stat = new Stat();
EditLogLedgerMetadataWritable metadataWritable = readWritableFromZk(
fullyQualifiedInProgressPath, localWritable.get(), stat);
return metadataWritable == null ?
null : Versioned.of(stat.getVersion(), metadataWritable.get());
}
/**
* Return the full ZNode path for a ZNode corresponding to the specified
* ledger.
* @param ledgerName The name of the ledger
* @return Fully qualified ZNode path that may be read from or written to.
*/
public String fullyQualifiedPathForLedger(String ledgerName) {
return joinPath(ledgerParentPath, ledgerName);
}
/**
* Return the full ZNode path for a ZNode corresponding to a specific
* to a specific ledger's metadata.
* @param e Metadata object that we want to get the full path for
* @return Full path to the ledger's child ZNode
*/
public String fullyQualifiedPathForLedger(EditLogLedgerMetadata e) {
String nameForLedger = nameForLedger(e);
return fullyQualifiedPathForLedger(nameForLedger);
}
/**
* Removes ledger-related Metadata from BookKeeper. Does not delete
* the ledger itself.
* @param ledger The object for the ledger metadata that we want to delete
* from BookKeeper
* @param version The version of the ledger metadata (or -1 to delete any
* version). Used as a way to guard against deleting a ledger
* metadata that is being updated by another process.
* @return True if the process successfully deletes the metadata objection,
* false if it has already been deleted by another process.
* @throws IOException If there is an error communicating to ZooKeeper or if
* the metadata object has been modified within ZooKeeper
* by another process (version mis-match).
*/
public boolean deleteLedgerMetadata(EditLogLedgerMetadata ledger, int version)
throws IOException {
String ledgerPath = fullyQualifiedPathForLedger(ledger);
try {
zooKeeper.delete(ledgerPath, version);
return true;
} catch (KeeperException.NoNodeException e) {
LOG.warn(ledgerPath + " does not exist. Returning false, ignoring " +
e);
} catch (KeeperException.BadVersionException e) {
keeperException("Unable to delete " + ledgerPath + ", version does not match." +
" Updated by another process?", e);
} catch (KeeperException e) {
keeperException("Unrecoverable ZooKeeper error deleting " + ledgerPath,
e);
} catch (InterruptedException e) {
interruptedException("Interrupted deleting " + ledgerPath, e);
}
return false;
}
public boolean ledgerExists(String fullyQualifiedPath) throws IOException {
try {
return zooKeeper.exists(fullyQualifiedPath, false) != null;
} catch (KeeperException e) {
keeperException("Unrecoverable ZooKeeper error checking if " +
fullyQualifiedPath + " exists!", e);
return false; // Never reached
} catch (InterruptedException e) {
interruptedException("Interrupted checking if " + fullyQualifiedPath +
" exists!", e);
return false; // Never reached
}
}
/**
* Read a {@link Writable} from a specified ZNode path ZooKeeper and
* (optionally) update its {@link Stat} information (if supplied).
* The ZNode must either not exist or contain a valid writable: an empty
* ZNode will result in an IllegalArgumentException being thrown.
* @param fullyQualifiedPath Full path to the ZNode containing the writable
* @param writable A newly instantiated instance of the writable. Must not
* be null.
* @param stat The stat object for ZNode stats or null if none desired.
* @param <T> Type of the writable (must implement Writable)
* @return The updated writable instance or null if the specified path does
* not exist
* @throws IOException If there is an unrecoverable error communicating to
* ZooKeeper
* @throws IllegalArgumentException If the data contained in the specified
* ZNode is null
*/
public <T extends Writable> T readWritableFromZk(
String fullyQualifiedPath, T writable, Stat stat) throws IOException {
try {
byte[] data = zooKeeper.getData(fullyQualifiedPath, false, stat);
if (data == null) {
LOG.fatal("ZNode " + fullyQualifiedPath + " has no ledger metadata!");
throw new IOException("ZNode " + fullyQualifiedPath +
" has no ledger metadata!");
}
return WritableUtil.readWritableFromByteArray(data, writable);
} catch (InterruptedException e) {
interruptedException("Interrupted reading from " +
fullyQualifiedPath, e);
return null; // Should not be reached
} catch (KeeperException.NoNodeException e) {
LOG.warn("ZNode " + fullyQualifiedPath +
" does not exist, returning null! Not re-throwing " + e);
return null;
} catch (KeeperException e) {
keeperException("Unrecoverable ZooKeeper error reading from " +
fullyQualifiedPath, e);
return null;
}
}
/**
* Instantiate an {@link EditLogLedgerMetadata} from a specified ZNode path.
* @param fullyQualifiedPath Full path to the ZNode containing the ledger
* metadata.
* @return The edit log ledger metadata or null if the ZNode does not exist
* @throws IOException If there as an unrecoverable error communicating with
* ZooKeeper
*/
public EditLogLedgerMetadata readEditLogLedgerMetadata(
String fullyQualifiedPath) throws IOException {
EditLogLedgerMetadataWritable writable = localWritable.get();
writable = readWritableFromZk(fullyQualifiedPath, writable, null);
return writable == null ? null : writable.get();
}
/**
* Write an {@link EditLogLedgerMetadata} object to a specified ZNode.
* @param fullyQualifiedPath Full path to the ZNode containing the ledger
* metadata.
* @param editLogLedgerMetadata The edit log ledger metadata to write.
* @return True if we wrote successfully to the specified ZNode, or false if
* the ZNode already exists.
* @throws IOException If there is an unrecoverable error communicating with
* ZooKeeper.
*/
public boolean writeEditLogLedgerMetadata(String fullyQualifiedPath,
EditLogLedgerMetadata editLogLedgerMetadata) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("Attempting to write " + editLogLedgerMetadata + " to " +
fullyQualifiedPath);
}
EditLogLedgerMetadataWritable writable = localWritable.get();
writable.set(editLogLedgerMetadata);
byte[] data = WritableUtil.writableToByteArray(writable);
try {
if (zooKeeper.exists(fullyQualifiedPath, false) != null) {
LOG.warn(fullyQualifiedPath + " already exists. Will not override!");
return false;
}
zooKeeper.create(fullyQualifiedPath, data, ZooDefs.Ids.OPEN_ACL_UNSAFE,
CreateMode.PERSISTENT);
return true;
} catch (InterruptedException e) {
interruptedException("Interrupted creating " + fullyQualifiedPath,
e);
return false; // never reached
} catch (KeeperException.NodeExistsException e) {
LOG.warn(fullyQualifiedPath +
" already exists, returning false! Ignoring " + e);
return false;
} catch (KeeperException e) {
keeperException("Unrecoverable ZooKeeper error creating " +
fullyQualifiedPath, e);
return false;
}
}
/**
* Verify that the specified EditLogLedgerMetadata instance is the same
* as the EditLogLedgerMetadata object stored in the specified ZNode path.
* @param metadata The instance to compare against
* @param fullPathToVerify The path to verify
* @return True if we are able to successfully verify the metadata
*/
public boolean verifyEditLogLedgerMetadata(EditLogLedgerMetadata metadata,
String fullPathToVerify) {
Preconditions.checkNotNull(metadata);
try {
EditLogLedgerMetadata otherMetadata =
readEditLogLedgerMetadata(fullPathToVerify);
if (otherMetadata == null) {
LOG.warn("No metadata found " + fullPathToVerify + "!");
}
if (LOG.isTraceEnabled()) {
LOG.trace("Verifying " + otherMetadata + " read from " +
fullPathToVerify + " against " + metadata);
}
return metadata.equals(otherMetadata);
} catch (IOException e) {
LOG.error("Unrecoverable error when verifying " + fullPathToVerify, e);
}
return false;
}
public void moveAsideCorruptLedger(EditLogLedgerMetadata ledger)
throws IOException {
deleteLedgerMetadata(ledger, -1);
writeEditLogLedgerMetadata(fullyQualifiedPathForLedger(ledger) +
BKJM_EDIT_CORRUPT, ledger);
}
/**
* List all ledgers in this instance's ZooKeeper namespace.
* @param includeInProgressLedgers If true, will include in-progress
* (non-finalized) ledgers
* @return A list of ledgers ordered (in increasing order) by the first
* transaction id, using
* {@link EditLogLedgerMetadata#compareTo(EditLogLedgerMetadata)}
* @throws IOException
*/
public Collection<EditLogLedgerMetadata> listLedgers(
boolean includeInProgressLedgers) throws IOException {
// Use TreeSet to sort ledgers by firstTxId
TreeSet<EditLogLedgerMetadata> ledgers =
new TreeSet<EditLogLedgerMetadata>();
try {
List<String> ledgerNames = zooKeeper.getChildren(ledgerParentPath,
false);
for (String ledgerName : ledgerNames) {
if (ledgerName.endsWith(BKJM_EDIT_CORRUPT)) {
continue;
}
if (!includeInProgressLedgers && ledgerName.contains(BKJM_EDIT_INPROGRESS)) {
continue;
}
String fullLedgerMetadataPath = fullyQualifiedPathForLedger(ledgerName);
EditLogLedgerMetadata metadata =
readEditLogLedgerMetadata(fullLedgerMetadataPath);
if (metadata != null) {
if (LOG.isTraceEnabled()) {
LOG.trace("Read " + metadata + " from " + fullLedgerMetadataPath);
}
ledgers.add(metadata);
} else { // metadata would be returns null iff path doesn't exist
LOG.warn("ZNode " + fullLedgerMetadataPath +
" might have been finalized and deleted.");
}
}
} catch (InterruptedException e) {
interruptedException(
"Interrupted listing ledgers under " + ledgerParentPath, e);
} catch (KeeperException e) {
keeperException("Unrecoverable ZooKeeper error listing ledgers " +
"under " + ledgerParentPath, e);
}
return ledgers;
}
}