/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.beats.frame;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.LinkedList;
import java.util.List;
import java.util.zip.InflaterInputStream;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.stream.io.ByteArrayInputStream;
import org.apache.nifi.stream.io.ByteArrayOutputStream;
/**
* Decodes a Beats frame by maintaining a state based on each byte that has been processed. This class
* should not be shared by multiple threads.
*/
public class BeatsDecoder {
final ComponentLog logger;
private BeatsFrame.Builder frameBuilder;
private BeatsState currState = BeatsState.VERSION;
private byte decodedFrameType;
private byte[] unprocessedData;
private final Charset charset;
private final ByteArrayOutputStream currBytes;
private long windowSize;
static final int MIN_FRAME_HEADER_LENGTH = 2; // Version + Type
static final int WINDOWSIZE_LENGTH = MIN_FRAME_HEADER_LENGTH + 4; // 32bit unsigned window size
static final int COMPRESSED_MIN_LENGTH = MIN_FRAME_HEADER_LENGTH + 4; // 32 bit unsigned + payload
static final int JSON_MIN_LENGTH = MIN_FRAME_HEADER_LENGTH + 8; // 32 bit unsigned sequence number + 32 bit unsigned payload length
public static final byte FRAME_WINDOWSIZE = 0x57, FRAME_DATA = 0x44, FRAME_COMPRESSED = 0x43, FRAME_ACK = 0x41, FRAME_JSON = 0x4a;
/**
* @param charset the charset to decode bytes from the frame
*/
public BeatsDecoder(final Charset charset, final ComponentLog logger) {
this(charset, new ByteArrayOutputStream(4096), logger);
}
/**
* @param charset the charset to decode bytes from the frame
* @param buffer a buffer to use while processing the bytes
*/
public BeatsDecoder(final Charset charset, final ByteArrayOutputStream buffer, final ComponentLog logger) {
this.logger = logger;
this.charset = charset;
this.currBytes = buffer;
this.frameBuilder = new BeatsFrame.Builder();
this.decodedFrameType = 0x00;
}
/**
* Resets this decoder back to its initial state.
*/
public void reset() {
frameBuilder = new BeatsFrame.Builder();
currState = BeatsState.VERSION;
decodedFrameType = 0x00;
currBytes.reset();
}
/**
* Process the next byte from the channel, updating the builder and state accordingly.
*
* @param currByte the next byte to process
* @preturn true if a frame is ready to be retrieved, false otherwise
*/
public boolean process(final byte currByte) throws BeatsFrameException {
try {
switch (currState) {
case VERSION: // Just enough data to process the version
processVERSION(currByte);
break;
case FRAMETYPE: // Also able to process the frametype
processFRAMETYPE(currByte);
break;
case PAYLOAD: // Initial bytes with version and Frame Type have already been received, start iteration over payload
processPAYLOAD(currByte);
// At one stage, the data sent to processPAYLOAD will be represente a complete frame, so we check before returning true
if (frameBuilder.frameType == FRAME_WINDOWSIZE && currState == BeatsState.COMPLETE) {
return true;
} else if (frameBuilder.frameType == FRAME_COMPRESSED && currState == BeatsState.COMPLETE) {
return true;
} else if (frameBuilder.frameType == FRAME_JSON && currState == BeatsState.COMPLETE) {
return true;
} else {
break;
}
case COMPLETE:
return true;
default:
break;
}
return false;
} catch (Exception e) {
throw new BeatsFrameException("Error decoding Beats frame: " + e.getMessage(), e);
}
}
/**
* Returns the decoded frame and resets the decoder for the next frame.
* This method should be called after checking isComplete().
*
* @return the BeatsFrame that was decoded
*/
public List<BeatsFrame> getFrames() throws BeatsFrameException {
List<BeatsFrame> frames = new LinkedList<>();
if (currState != BeatsState.COMPLETE) {
throw new BeatsFrameException("Must be at the trailer of a frame");
}
try {
// Once compressed frames are expanded, they must be devided into individual frames
if (currState == BeatsState.COMPLETE && frameBuilder.frameType == FRAME_COMPRESSED) {
logger.debug("Frame is compressed, will iterate to decode", new Object[]{});
// Zero currBytes, currState and frameBuilder prior to iteration over
// decompressed bytes
currBytes.reset();
frameBuilder.reset();
currState = BeatsState.VERSION;
// Run over decompressed data and split frames
frames = splitCompressedFrames(unprocessedData);
// In case of V or wired D and J frames we just ship them across the List
} else {
final BeatsFrame frame = frameBuilder.build();
currBytes.reset();
frameBuilder.reset();
currState = BeatsState.VERSION;
frames.add(frame);
}
return frames;
} catch (Exception e) {
throw new BeatsFrameException("Error decoding Beats frame: " + e.getMessage(), e);
}
}
private List<BeatsFrame> splitCompressedFrames(byte[] decompressedData) {
List<BeatsFrame> frames = new LinkedList<>();
BeatsFrame.Builder internalFrameBuilder = new BeatsFrame.Builder();
ByteBuffer currentData = ByteBuffer.wrap(decompressedData);
// Both Lumberjack v1 and Beats (LJ v2) has a weird approach to frames, where compressed frames embed D(ata) or J(SON) frames.
// inside a compressed input.
// Or as stated in the documentation:
//
// "As an example, you could have 3 data frames compressed into a single
// 'compressed' frame type: 1D{k,v}{k,v}1D{k,v}{k,v}1D{k,v}{k,v}"
//
// Therefore, instead of calling process method again, just iterate over each of
// the frames and split them so they can be processed by BeatsFrameHandler
while (currentData.hasRemaining()) {
int payloadLength = 0;
internalFrameBuilder.version = currentData.get();
internalFrameBuilder.frameType = currentData.get();
switch (internalFrameBuilder.frameType) {
case FRAME_JSON:
internalFrameBuilder.seqNumber = (int) (currentData.getInt() & 0x00000000ffffffffL);
currentData.mark();
internalFrameBuilder.dataSize = currentData.getInt() & 0x00000000ffffffffL;
currentData.mark();
// Define how much data to chomp
payloadLength = Math.toIntExact(internalFrameBuilder.dataSize);
byte[] jsonBytes = new byte[payloadLength];
currentData.get(jsonBytes, 0, payloadLength);
currentData.mark();
// Add payload to frame
internalFrameBuilder.payload(jsonBytes);
break;
}
// data frame is created
BeatsFrame frame = internalFrameBuilder.build();
frames.add(frame);
internalFrameBuilder.reset();
}
return frames;
}
private void processVERSION(final byte b) {
byte version = b;
frameBuilder.version(version);
logger.debug("Version number is {}", new Object[]{version});
currBytes.write(b);
currState = BeatsState.FRAMETYPE;
}
private void processFRAMETYPE(final byte b) {
decodedFrameType = b;
frameBuilder.frameType(decodedFrameType);
logger.debug("Frame type is {}", new Object[]{decodedFrameType});
currBytes.write(b);
currState = BeatsState.PAYLOAD;
}
/** Process the outer PAYLOAD byte by byte. Once data is read state is set to COMPLETE so that the data payload
* can be processed fully using {@link #splitCompressedFrames(byte[])}
* */
private void processPAYLOAD(final byte b) {
currBytes.write(b);
switch (decodedFrameType) {
case FRAME_WINDOWSIZE: //'W'
if (currBytes.size() < WINDOWSIZE_LENGTH ) {
logger.trace("Beats currBytes contents are {}", new Object[] {currBytes.toString()});
break;
} else if (currBytes.size() == WINDOWSIZE_LENGTH) {
frameBuilder.dataSize = ByteBuffer.wrap(java.util.Arrays.copyOfRange(currBytes.toByteArray(), 2, 6)).getInt() & 0x00000000ffffffffL;
logger.debug("Data size is {}", new Object[]{frameBuilder.dataSize});
// Sets payload to empty as frame contains no data
frameBuilder.payload(new byte[]{});
currBytes.reset();
currState = BeatsState.COMPLETE;
windowSize = frameBuilder.dataSize;
break;
} else { // Should never be here to be honest...
logger.debug("Saw a packet I should not have seen. Packet contents were {}", new Object[] {currBytes.toString()});
break;
}
case FRAME_COMPRESSED: //'C'
if (currBytes.size() < COMPRESSED_MIN_LENGTH) {
if (logger.isTraceEnabled()) {
logger.trace("Beats currBytes contents are {}", new Object[] {currBytes.toString()});
}
break;
} else if (currBytes.size() >= COMPRESSED_MIN_LENGTH) {
// If data contains more thant the minimum data size
frameBuilder.dataSize = ByteBuffer.wrap(java.util.Arrays.copyOfRange(currBytes.toByteArray(), 2, 6)).getInt() & 0x00000000ffffffffL;
if (currBytes.size() - 6 == frameBuilder.dataSize) {
try {
byte[] buf = java.util.Arrays.copyOfRange(currBytes.toByteArray(), 6, currBytes.size());
InputStream in = new InflaterInputStream(new ByteArrayInputStream(buf));
ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len;
while ((len = in.read(buffer)) > 0) {
out.write(buffer, 0, len);
}
in.close();
out.close();
unprocessedData = out.toByteArray();
// buf is no longer needed
buf = null;
logger.debug("Finished decompressing data");
// Decompression is complete, we should be able to proceed with resetting currBytes and curSrtate and iterating them
// as type 'D' frames
frameBuilder.dataSize(unprocessedData.length);
currState = BeatsState.COMPLETE;
} catch (IOException e) {
throw new BeatsFrameException("Error decompressing frame: " + e.getMessage(), e);
}
}
break;
// If currentByte.size is not lower than six and also not equal or great than 6...
} else { // Should never be here to be honest...
if (logger.isDebugEnabled()) {
logger.debug("Received a compressed frame with partial data or invalid content. The packet contents were {}", new Object[] {currBytes.toString()});
}
break;
}
case FRAME_JSON: // 'J́'
// Because Beats can disable compression, sometimes, JSON data will be received outside a compressed
// stream (i.e. 0x43). Instead of processing it here, we defer its processing to went getFrames is
// called
if (currBytes.size() < JSON_MIN_LENGTH) {
if (logger.isTraceEnabled()) {
logger.trace("Beats currBytes contents are {}", new Object[] {currBytes.toString()});
}
break;
} else if (currBytes.size() == JSON_MIN_LENGTH) {
// Read the sequence number from bytes
frameBuilder.seqNumber = (int) (ByteBuffer.wrap(java.util.Arrays.copyOfRange(currBytes.toByteArray(), 2, 6)).getInt() & 0x00000000ffffffffL);
// Read the JSON payload length
frameBuilder.dataSize = ByteBuffer.wrap(java.util.Arrays.copyOfRange(currBytes.toByteArray(), 6, 10)).getInt() & 0x00000000ffffffffL;
} else if (currBytes.size() > JSON_MIN_LENGTH) {
// Wait for payload to be fully read and then complete processing
if (currBytes.size() - 10 == frameBuilder.dataSize) {
// Transfer the current payload so it can be processed by {@link #splitCompressedFrames} method.
frameBuilder.payload = java.util.Arrays.copyOfRange(currBytes.toByteArray(), 10, currBytes.size());
currState = BeatsState.COMPLETE;
}
break;
}
}
}
}