/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.qjournal.client;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hdfs.server.namenode.EditLogOutputStream;
import org.apache.hadoop.hdfs.server.namenode.EditsDoubleBuffer;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.metrics.util.MetricsBase;
import org.apache.hadoop.metrics.util.MetricsTimeVaryingRate;
/**
* EditLogOutputStream implementation that writes to a quorum of
* remote journals.
*/
class QuorumOutputStream extends EditLogOutputStream {
static final Log LOG = LogFactory.getLog(QuorumOutputStream.class);
private final AsyncLoggerSet loggers;
private EditsDoubleBuffer buf;
private final long segmentTxId;
private final int writeTimeoutMs;
private String journalId;
public QuorumOutputStream(AsyncLoggerSet loggers, long txId,
int outputBufferCapacity, int writeTimeoutMs, NameNodeMetrics metrics, String journalId)
throws IOException {
super();
this.buf = new EditsDoubleBuffer(outputBufferCapacity);
this.loggers = loggers;
this.segmentTxId = txId;
this.writeTimeoutMs = writeTimeoutMs;
this.journalId = journalId;
if (metrics != null) { // Metrics is non-null only when used inside name node
String metricsName = "sync_qjm_" + journalId + "_edit";
MetricsBase retrMetrics = metrics.registry.get(metricsName);
if (retrMetrics != null) {
sync = (MetricsTimeVaryingRate) retrMetrics;
} else {
sync = new MetricsTimeVaryingRate(metricsName, metrics.registry,
"Journal Sync for " + journalId);
}
}
}
@Override
public void write(FSEditLogOp op) throws IOException {
buf.writeOp(op);
}
@Override
public void writeRaw(byte[] bytes, int offset, int length) throws IOException {
buf.writeRaw(bytes, offset, length);
}
@Override
public void writeRawOp(byte[] bytes, int offset, int length, long txid)
throws IOException {
buf.writeRawOp(bytes, offset, length, txid);
}
@Override
public void create() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void close() throws IOException {
if (buf != null) {
buf.close();
buf = null;
}
}
@Override
public void abort() throws IOException {
QuorumJournalManager.LOG.warn("Aborting " + this);
buf = null;
close();
}
@Override
public void setReadyToFlush() throws IOException {
buf.setReadyToFlush();
}
@Override
protected void flushAndSync(boolean durable) throws IOException {
int numReadyBytes = buf.countReadyBytes();
if (numReadyBytes > 0) {
int numReadyTxns = buf.countReadyTxns();
long firstTxToFlush = buf.getFirstReadyTxId();
if (numReadyTxns < 0) {
LOG.warn("There are no ready transaction");
throw new IllegalStateException("There are no ready transaction");
}
// Copy from our double-buffer into a new byte array. This is for
// two reasons:
// 1) The IPC code has no way of specifying to send only a slice of
// a larger array.
// 2) because the calls to the underlying nodes are asynchronous, we
// need a defensive copy to avoid accidentally mutating the buffer
// before it is sent.
DataOutputBuffer bufToSend = new DataOutputBuffer(numReadyBytes);
buf.flushTo(bufToSend);
if (bufToSend.getLength() != numReadyBytes) {
LOG.warn("Buffer size mismatch");
throw new IllegalStateException("Buffer size mismatch");
}
byte[] data = bufToSend.getData();
if (data.length != bufToSend.getLength()) {
LOG.warn("Data size mismatch");
throw new IllegalStateException("Data size mismatch");
}
QuorumCall<AsyncLogger, Void> qcall = loggers.sendEdits(
segmentTxId, firstTxToFlush,
numReadyTxns, data);
try {
loggers.waitForWriteQuorum(qcall, writeTimeoutMs, "sendEdits");
} catch (IOException e) {
String msg = "Got IOException when waiting for sendEdits. SegmentTxId: " + segmentTxId +
", firstTxToFlush: " + firstTxToFlush + ", numReadyTxns: " + numReadyTxns + ", lengthOfData: " +
data.length;
LOG.error(msg, e);
// re-throw
throw e;
}
// Since we successfully wrote this batch, let the loggers know. Any future
// RPCs will thus let the loggers know of the most recent transaction, even
// if a logger has fallen behind.
loggers.setCommittedTxId(firstTxToFlush + numReadyTxns - 1, false);
}
}
@Override
public String toString() {
return "QuorumOutputStream " + journalId + " starting at txid " + segmentTxId;
}
@Override
public boolean shouldForceSync() {
return buf.shouldForceSync();
}
@Override
public String getName() {
// TODO Auto-generated method stub
return null;
}
@Override
public long length() throws IOException {
// TODO Auto-generated method stub
return 0;
}
@Override
public String generateHtmlReport() {
StringBuilder sb = new StringBuilder();
sb.append("Writing segment beginning at txid " + segmentTxId + "<br/>");
loggers.appendHtmlReport(sb);
return sb.toString();
}
}