/**
* Copyright 2016 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
package com.github.ambry.messageformat;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.MetricRegistry;
import com.github.ambry.store.MessageInfo;
import com.github.ambry.store.StoreKey;
import com.github.ambry.store.StoreKeyFactory;
import com.github.ambry.utils.SystemTime;
import com.github.ambry.utils.Utils;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* InputStream that skips invalid blobs based on some validation criteria.
* For now, the check only supports detection of message corruption
*/
public class MessageSievingInputStream extends InputStream {
private int validSize;
private final Logger logger;
private ByteBuffer byteBuffer;
private boolean hasInvalidMessages;
private List<MessageInfo> validMessageInfoList;
//metrics
public Histogram messageFormatValidationTime;
public Histogram messageFormatBatchValidationTime;
/**
* @param stream The stream from which bytes need to be read. If the underlying stream is SocketInputStream, it needs
* to be blocking
* @param messageInfoList List of MessageInfo which contains details about the messages in the stream
* @param storeKeyFactory factory which is used to read the key from the stream
* @param metricRegistry Metric register to register metrics
* @throws java.io.IOException
*/
public MessageSievingInputStream(InputStream stream, List<MessageInfo> messageInfoList,
StoreKeyFactory storeKeyFactory, MetricRegistry metricRegistry) throws IOException {
this.logger = LoggerFactory.getLogger(getClass());
messageFormatValidationTime =
metricRegistry.histogram(MetricRegistry.name(MessageSievingInputStream.class, "MessageFormatValidationTime"));
messageFormatBatchValidationTime = metricRegistry.histogram(
MetricRegistry.name(MessageSievingInputStream.class, "MessageFormatBatchValidationTime"));
validSize = 0;
hasInvalidMessages = false;
validMessageInfoList = new ArrayList<MessageInfo>();
// check for empty list
if (messageInfoList.size() == 0) {
byteBuffer = ByteBuffer.allocate(0);
return;
}
int totalMessageListSize = 0;
for (MessageInfo info : messageInfoList) {
totalMessageListSize += info.getSize();
}
int bytesRead = 0;
byte[] data = new byte[totalMessageListSize];
long startTime = SystemTime.getInstance().milliseconds();
logger.trace("Starting to validate message stream ");
int offset = 0;
for (MessageInfo msgInfo : messageInfoList) {
int msgSize = (int) msgInfo.getSize();
Utils.readBytesFromStream(stream, data, offset, msgSize);
logger.trace("Read stream for message info " + msgInfo + " into memory");
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(data, offset, msgSize);
if (checkForMessageValidity(byteArrayInputStream, offset, msgSize, storeKeyFactory, msgInfo)) {
offset += msgSize;
validMessageInfoList.add(msgInfo);
} else {
logger.error("Error reading the message at " + bytesRead + " with messageInfo " + msgInfo
+ " and hence skipping the message");
hasInvalidMessages = true;
}
bytesRead += msgSize;
}
if (bytesRead != totalMessageListSize) {
logger.error(
"Failed to read intended size from stream. Expected " + totalMessageListSize + ", actual " + bytesRead);
}
if (validMessageInfoList.size() == 0) {
logger.error("All messages are invalidated in this message stream ");
}
messageFormatBatchValidationTime.update(SystemTime.getInstance().milliseconds() - startTime);
this.validSize = offset;
byteBuffer = ByteBuffer.wrap(data, 0, validSize);
logger.trace("Completed validation of message stream ");
}
/**
* Returns the total size of all valid messages that could be read from the stream
* @return validSize
*/
public int getSize() {
return validSize;
}
@Override
public int read() throws IOException {
if (!byteBuffer.hasRemaining()) {
return -1;
}
return byteBuffer.get() & 0xFF;
}
@Override
public int read(byte[] bytes, int offset, int length) throws IOException {
if (bytes == null) {
throw new IllegalArgumentException("Byte array cannot be null");
} else if (offset < 0 || length < 0 || length > bytes.length - offset) {
throw new IndexOutOfBoundsException();
} else if (length == 0) {
return 0;
}
int count = Math.min(byteBuffer.remaining(), length);
if (count == 0) {
return -1;
}
byteBuffer.get(bytes, offset, count);
return count;
}
/**
* Whether the stream has invalid messages or not
* @return
*/
public boolean hasInvalidMessages() {
return hasInvalidMessages;
}
public List<MessageInfo> getValidMessageInfoList() {
return validMessageInfoList;
}
/**
* Ensures blob validity in the given input stream. For now, blobs are checked for message corruption
* @param byteArrayInputStream stream against which validation has to be done
* @param size total size of the message expected
* @param currentOffset Current offset at which the data has to be read from the given byte array
* @param storeKeyFactory StoreKeyFactory used to get store key
* @return true if message is valid and false otherwise
* @throws IOException
*/
private boolean checkForMessageValidity(ByteArrayInputStream byteArrayInputStream, int currentOffset, long size,
StoreKeyFactory storeKeyFactory, MessageInfo msgInfo) throws IOException {
boolean isValid = false;
BlobProperties props = null;
ByteBuffer metadata = null;
BlobData blobData = null;
long startTime = SystemTime.getInstance().milliseconds();
try {
int availableBeforeParsing = byteArrayInputStream.available();
byte[] headerVersionInBytes = new byte[MessageFormatRecord.Version_Field_Size_In_Bytes];
byteArrayInputStream.read(headerVersionInBytes, 0, MessageFormatRecord.Version_Field_Size_In_Bytes);
ByteBuffer headerVersion = ByteBuffer.wrap(headerVersionInBytes);
short version = headerVersion.getShort();
if (version == 1) {
ByteBuffer headerBuffer = ByteBuffer.allocate(MessageFormatRecord.MessageHeader_Format_V1.getHeaderSize());
headerBuffer.putShort(version);
byteArrayInputStream.read(headerBuffer.array(), 2, headerBuffer.capacity() - 2);
headerBuffer.position(headerBuffer.capacity());
headerBuffer.flip();
MessageFormatRecord.MessageHeader_Format_V1 header =
new MessageFormatRecord.MessageHeader_Format_V1(headerBuffer);
StoreKey storeKey = storeKeyFactory.getStoreKey(new DataInputStream(byteArrayInputStream));
if (header.getBlobPropertiesRecordRelativeOffset()
!= MessageFormatRecord.Message_Header_Invalid_Relative_Offset) {
props = MessageFormatRecord.deserializeBlobProperties(byteArrayInputStream);
metadata = MessageFormatRecord.deserializeUserMetadata(byteArrayInputStream);
blobData = MessageFormatRecord.deserializeBlob(byteArrayInputStream);
} else {
throw new IllegalStateException("Message cannot be a deleted record ");
}
if (byteArrayInputStream.available() != 0) {
logger.error("Parsed message size " + (availableBeforeParsing + byteArrayInputStream.available())
+ " is not equivalent to the size in message info " + availableBeforeParsing);
} else {
if (logger.isTraceEnabled()) {
logger.trace("Message Successfully read");
logger.trace(
"Header - version {} Message Size {} Starting offset of the blob {} BlobPropertiesRelativeOffset {}"
+ " UserMetadataRelativeOffset {} DataRelativeOffset {} DeleteRecordRelativeOffset {} Crc {}",
header.getVersion(), header.getMessageSize(), currentOffset,
header.getBlobPropertiesRecordRelativeOffset(), header.getUserMetadataRecordRelativeOffset(),
header.getBlobRecordRelativeOffset(), header.getDeleteRecordRelativeOffset(), header.getCrc());
logger.trace("Id {} Blob Properties - blobSize {} Metadata - size {} Blob - size {} ", storeKey.getID(),
props.getBlobSize(), metadata.capacity(), blobData.getSize());
}
if (msgInfo.getStoreKey().equals(storeKey)) {
isValid = true;
} else {
logger.error(
"StoreKey in log " + storeKey + " failed to match store key from Index " + msgInfo.getStoreKey());
}
}
} else {
throw new MessageFormatException("Header version not supported " + version,
MessageFormatErrorCodes.Data_Corrupt);
}
} catch (MessageFormatException e) {
logger.error(
"MessageFormat exception thrown for a blob starting at offset " + currentOffset + " with exception: ", e);
} finally {
messageFormatValidationTime.update(SystemTime.getInstance().milliseconds() - startTime);
}
return isValid;
}
}