/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.qjournal.client; import java.io.IOException; import java.util.List; import java.util.Map; import java.util.concurrent.TimeoutException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption; import org.apache.hadoop.hdfs.server.common.HdfsConstants.Transition; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalStateResponseProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetStorageStateProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; import org.apache.hadoop.hdfs.server.protocol.RemoteImageManifest; import org.apache.hadoop.io.MD5Hash; import org.apache.jasper.compiler.JspUtil; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.Maps; import com.google.common.util.concurrent.ListenableFuture; /** * Wrapper around a set of Loggers, taking care of fanning out * calls to the underlying loggers and constructing corresponding * {@link QuorumCall} instances. */ class AsyncLoggerSet { static final Log LOG = LogFactory.getLog(AsyncLoggerSet.class); private final List<AsyncLogger> loggers; private static final long INVALID_EPOCH = -1; private long myEpoch = INVALID_EPOCH; public AsyncLoggerSet(List<AsyncLogger> loggers) { this.loggers = ImmutableList.copyOf(loggers); } /** * Set the epoch number used for all future calls. */ void setEpoch(long e) { Preconditions.checkState(!isEpochEstablished(), "Epoch already established: epoch=%s", myEpoch); myEpoch = e; for (AsyncLogger l : loggers) { l.setEpoch(e); } } /** * Set the highest successfully committed txid seen by the writer. * This should be called after a successful write to a quorum, and is used * for extra sanity checks against the protocol. See HDFS-3863. */ public void setCommittedTxId(long txid, boolean force) { for (AsyncLogger logger : loggers) { logger.setCommittedTxId(txid, force); } } /** * @return true if an epoch has been established. */ boolean isEpochEstablished() { return myEpoch != INVALID_EPOCH; } /** * @return the epoch number for this writer. This may only be called after * a successful call to {@link #createNewUniqueEpoch(NamespaceInfo)}. */ long getEpoch() { Preconditions.checkState(isEpochEstablished(), "No epoch created yet"); return myEpoch; } /** * Close all of the underlying loggers. */ void close() { for (AsyncLogger logger : loggers) { logger.close(); } } void purgeLogsOlderThan(long minTxIdToKeep) { for (AsyncLogger logger : loggers) { logger.purgeLogsOlderThan(minTxIdToKeep); } } /** * Wait for a quorum of loggers to respond to the given call. If a quorum * can't be achieved, throws a QuorumException. * @param q the quorum call * @param timeoutMs the number of millis to wait * @param operationName textual description of the operation, for logging * @return a map of successful results * @throws QuorumException if a quorum doesn't respond with success * @throws IOException if the thread is interrupted or times out */ <V> Map<AsyncLogger, V> waitForWriteQuorum(QuorumCall<AsyncLogger, V> q, int timeoutMs, String operationName) throws IOException { int majority = getMajoritySize(); int numLoggers = loggers.size(); checkMajoritySize(majority, numLoggers); return waitForQuorumInternal(q, loggers.size(), majority, numLoggers - majority + 1, majority, timeoutMs, operationName); } /** * Wait for a all loggers to respond to the given call. * * This is useful for operations like obtaining manifests, etc. * Fail if there is a majority of exceptions, or majority of successes cannot * be achieved. Even if majority of successes is achieved, the call * waits for all responses. * * @param q the quorum call * @param timeoutMs the number of millis to wait * @param operationName textual description of the operation, for logging * @return a map of successful results * @throws QuorumException if a quorum doesn't respond with success * @throws IOException if the thread is interrupted or times out */ <V> Map<AsyncLogger, V> waitForReadQuorumWithAllResponses( QuorumCall<AsyncLogger, V> q, int timeoutMs, String operationName) throws IOException { int majority = getMajoritySize(); int numLoggers = loggers.size(); checkMajoritySize(majority, numLoggers); // we do not stop waiting even if we get majority return waitForQuorumInternal(q, loggers.size(), -1, majority, numLoggers - majority + 1, timeoutMs, operationName); } private void checkMajoritySize(int majority, int numLoggers) throws IOException { if (majority > numLoggers) { throw new IOException("Waiting for majority " + majority + " of " + numLoggers + " loggers, which is impossible."); } } private <V> Map<AsyncLogger, V> waitForQuorumInternal( QuorumCall<AsyncLogger, V> q, int minResponses, int minSuccesses, int maxExceptions, int majority, int timeoutMs, String operationName) throws IOException { try { q.waitFor( minResponses, // either all respond minSuccesses, // or we get a majority successes maxExceptions, // or we get a majority failures, timeoutMs, operationName); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new IOException("Interrupted waiting " + timeoutMs + "ms for a " + "quorum of nodes to respond."); } catch (TimeoutException e) { if (q.countSuccesses() < majority) { // for waitForReadQuorumWithAllResponses, we do not want to fail here throw new IOException("Timed out waiting " + timeoutMs + "ms for a quorum of nodes to respond. countResponses: " + q.countResponses() + " countSuccess: " + q.countSuccesses() + " countException: " + q.countExceptions()); } } // for calls where we want all responses, we need to re-check here if // the number of successes is at least the majority size, since // minSuccesses=-1 if (q.countSuccesses() < majority) { q.throwQuorumException("Got too many exceptions to achieve quorum size " + getMajorityString()); } return q.getResults(); } /** * @return the number of nodes which are required to obtain a quorum. */ int getMajoritySize() { return loggers.size() / 2 + 1; } /** * @return a textual description of the majority size (eg "2/3" or "3/5") */ String getMajorityString() { return getMajoritySize() + "/" + loggers.size(); } /** * @return the number of loggers behind this set */ int size() { return loggers.size(); } @Override public String toString() { return "[" + Joiner.on(", ").join(loggers) + "]"; } public String toHTMLString() { String html = "["; for (AsyncLogger logger : loggers) { html += logger.toHTMLString() + ","; } html = html.substring(0, html.length() - 1) + "]"; return html; } /** * Append an HTML-formatted status readout on the current * state of the underlying loggers. * @param sb the StringBuilder to append to */ void appendHtmlReport(StringBuilder sb) { sb.append("<table class=\"storage\">"); sb.append("<thead><tr><td>JN</td><td>Status</td></tr></thead>\n"); for (AsyncLogger l : loggers) { sb.append("<tr>"); sb.append("<td>" + JspUtil.escapeXml(l.toString()) + "</td>"); sb.append("<td>"); l.appendHtmlReport(sb); sb.append("</td></tr>\n"); } sb.append("</table>"); } /** * @return the (mutable) list of loggers, for use in tests to * set up spies */ @VisibleForTesting List<AsyncLogger> getLoggersForTests() { return loggers; } /////////////////////////////////////////////////////////////////////////// // The rest of this file is simply boilerplate wrappers which fan-out the // various IPC calls to the underlying AsyncLoggers and wrap the result // in a QuorumCall. /////////////////////////////////////////////////////////////////////////// public QuorumCall<AsyncLogger, GetJournalStateResponseProto> getJournalState() { Map<AsyncLogger, ListenableFuture<GetJournalStateResponseProto>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { calls.put(logger, logger.getJournalState()); } return QuorumCall.create(calls); } public QuorumCall<AsyncLogger, Boolean> isJournalFormatted() { Map<AsyncLogger, ListenableFuture<Boolean>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { calls.put(logger, logger.isJournalFormatted()); } return QuorumCall.create(calls); } public QuorumCall<AsyncLogger,NewEpochResponseProto> newEpoch( NamespaceInfo nsInfo, long epoch) { Map<AsyncLogger, ListenableFuture<NewEpochResponseProto>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { calls.put(logger, logger.newEpoch(epoch)); } return QuorumCall.create(calls); } public QuorumCall<AsyncLogger, Void> startLogSegment( long txid) { Map<AsyncLogger, ListenableFuture<Void>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { calls.put(logger, logger.startLogSegment(txid)); } return QuorumCall.create(calls); } public QuorumCall<AsyncLogger, Void> finalizeLogSegment(long firstTxId, long lastTxId) { Map<AsyncLogger, ListenableFuture<Void>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { calls.put(logger, logger.finalizeLogSegment(firstTxId, lastTxId)); } return QuorumCall.create(calls); } public QuorumCall<AsyncLogger, Void> sendEdits( long segmentTxId, long firstTxnId, int numTxns, byte[] data) { Map<AsyncLogger, ListenableFuture<Void>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { ListenableFuture<Void> future = logger.sendEdits(segmentTxId, firstTxnId, numTxns, data); calls.put(logger, future); } return QuorumCall.create(calls); } public QuorumCall<AsyncLogger, RemoteEditLogManifest> getEditLogManifest(long fromTxnId) { Map<AsyncLogger, ListenableFuture<RemoteEditLogManifest>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { ListenableFuture<RemoteEditLogManifest> future = logger.getEditLogManifest(fromTxnId); calls.put(logger, future); } return QuorumCall.create(calls); } QuorumCall<AsyncLogger, PrepareRecoveryResponseProto> prepareRecovery(long segmentTxId) { Map<AsyncLogger, ListenableFuture<PrepareRecoveryResponseProto>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { ListenableFuture<PrepareRecoveryResponseProto> future = logger.prepareRecovery(segmentTxId); calls.put(logger, future); } return QuorumCall.create(calls); } QuorumCall<AsyncLogger,Void> acceptRecovery(SegmentStateProto log, String fromURL) { Map<AsyncLogger, ListenableFuture<Void>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { ListenableFuture<Void> future = logger.acceptRecovery(log, fromURL); calls.put(logger, future); } return QuorumCall.create(calls); } QuorumCall<AsyncLogger, GetStorageStateProto> analyzeJournalStorage() { Map<AsyncLogger, ListenableFuture<GetStorageStateProto>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { ListenableFuture<GetStorageStateProto> future = logger.analyzeJournalStorage(); calls.put(logger, future); } return QuorumCall.create(calls); } QuorumCall<AsyncLogger, Void> transitionImage(NamespaceInfo nsInfo, Transition transition, StartupOption startOpt) { Map<AsyncLogger, ListenableFuture<Void>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { ListenableFuture<Void> future = logger.transitionImage(nsInfo, transition, startOpt); calls.put(logger, future); } return QuorumCall.create(calls); } QuorumCall<AsyncLogger, Void> transitionJournal(NamespaceInfo nsInfo, Transition transition, StartupOption startOpt) { Map<AsyncLogger, ListenableFuture<Void>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { ListenableFuture<Void> future = logger.transitionJournal(nsInfo, transition, startOpt); calls.put(logger, future); } return QuorumCall.create(calls); } public QuorumCall<AsyncLogger, RemoteImageManifest> getImageManifest( long fromTxnId) { Map<AsyncLogger, ListenableFuture<RemoteImageManifest>> calls = Maps .newHashMap(); for (AsyncLogger logger : loggers) { ListenableFuture<RemoteImageManifest> future = logger .getImageManifest(fromTxnId); calls.put(logger, future); } return QuorumCall.create(calls); } URLImageInputStream getImageInputStream(long txid, int httpTimeout) { URLImageInputStream stream = null; for (AsyncLogger logger : loggers) { try { stream = new URLImageInputStream(logger, txid, httpTimeout); break; } catch (IOException e) { LOG.warn("Could not obtain image stream for logger: " + logger + " for txid : " + txid, e); } } return stream; } public QuorumCall<AsyncLogger, Void> saveDigestAndRenameCheckpointImage( long txid, MD5Hash digest) { Map<AsyncLogger, ListenableFuture<Void>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { calls.put(logger, logger.saveDigestAndRenameCheckpointImage(txid, digest)); } return QuorumCall.create(calls); } QuorumCall<AsyncLogger, GetStorageStateProto> analyzeImageStorage() { Map<AsyncLogger, ListenableFuture<GetStorageStateProto>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { ListenableFuture<GetStorageStateProto> future = logger.analyzeImageStorage(); calls.put(logger, future); } return QuorumCall.create(calls); } public QuorumCall<AsyncLogger, Boolean> isImageFormatted() { Map<AsyncLogger, ListenableFuture<Boolean>> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { calls.put(logger, logger.isImageFormatted()); } return QuorumCall.create(calls); } }