/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.lumberjack.frame; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.util.LinkedList; import java.util.List; import java.util.zip.InflaterInputStream; import org.apache.nifi.stream.io.ByteArrayInputStream; import org.apache.nifi.stream.io.ByteArrayOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Decodes a Lumberjack frame by maintaining a state based on each byte that has been processed. This class * should not be shared by multiple threads. */ @Deprecated public class LumberjackDecoder { static final Logger logger = LoggerFactory.getLogger(LumberjackDecoder.class); private LumberjackFrame.Builder frameBuilder; private LumberjackState currState = LumberjackState.VERSION; private byte decodedFrameType; private byte[] decompressedData; private final Charset charset; private final ByteArrayOutputStream currBytes; private long windowSize; public static final byte FRAME_WINDOWSIZE = 0x57, FRAME_DATA = 0x44, FRAME_COMPRESSED = 0x43, FRAME_ACK = 0x41, FRAME_JSON = 0x4a; /** * @param charset the charset to decode bytes from the Lumberjack frame */ public LumberjackDecoder(final Charset charset) { this(charset, new ByteArrayOutputStream(4096)); } /** * @param charset the charset to decode bytes from the Lumberjack frame * @param buffer a buffer to use while processing the bytes */ public LumberjackDecoder(final Charset charset, final ByteArrayOutputStream buffer) { this.charset = charset; this.currBytes = buffer; this.frameBuilder = new LumberjackFrame.Builder(); this.decodedFrameType = 0x00; } /** * Resets this decoder back to its initial state. */ public void reset() { frameBuilder = new LumberjackFrame.Builder(); currState = LumberjackState.VERSION; decodedFrameType = 0x00; currBytes.reset(); } /** * Process the next byte from the channel, updating the builder and state accordingly. * * @param currByte the next byte to process * @preturn true if a frame is ready to be retrieved, false otherwise */ public boolean process(final byte currByte) throws LumberjackFrameException { try { switch (currState) { case VERSION: processVERSION(currByte); break; case FRAMETYPE: processFRAMETYPE(currByte); break; case PAYLOAD: processPAYLOAD(currByte); if (frameBuilder.frameType == FRAME_WINDOWSIZE && currState == LumberjackState.COMPLETE) { return true; } else if (frameBuilder.frameType == FRAME_COMPRESSED && currState == LumberjackState.COMPLETE) { return true; } else { break; } case COMPLETE: return true; default: break; } return false; } catch (Exception e) { throw new LumberjackFrameException("Error decoding Lumberjack frame: " + e.getMessage(), e); } } /** * Returns the decoded frame and resets the decoder for the next frame. * This method should be called after checking isComplete(). * * @return the LumberjackFrame that was decoded */ public List<LumberjackFrame> getFrames() throws LumberjackFrameException { List<LumberjackFrame> frames = new LinkedList<>(); if (currState != LumberjackState.COMPLETE) { throw new LumberjackFrameException("Must be at the trailer of a frame"); } try { if (currState == LumberjackState.COMPLETE && frameBuilder.frameType == FRAME_COMPRESSED) { logger.debug("Frame is compressed, will iterate to decode", new Object[]{}); // LumberjackDecoder decompressedDecoder = new LumberjackDecoder(); // Zero currBytes, currState and frameBuilder prior to iteration over // decompressed bytes currBytes.reset(); frameBuilder.reset(); currState = LumberjackState.VERSION; // Run over decompressed data. frames = processDECOMPRESSED(decompressedData); } else { final LumberjackFrame frame = frameBuilder.build(); currBytes.reset(); frameBuilder.reset(); currState = LumberjackState.VERSION; frames.add(frame); } return frames; } catch (Exception e) { throw new LumberjackFrameException("Error decoding Lumberjack frame: " + e.getMessage(), e); } } private List<LumberjackFrame> processDECOMPRESSED(byte[] decompressedData) { List<LumberjackFrame> frames = new LinkedList<>(); LumberjackFrame.Builder internalFrameBuilder = new LumberjackFrame.Builder(); ByteBuffer currentData = ByteBuffer.wrap(decompressedData); // Lumberjack has a weird approach to frames, where compressed frames embed D(ata) or J(SON) frames. // inside a compressed input. // Or as stated in the documentation: // // "As an example, you could have 3 data frames compressed into a single // 'compressed' frame type: 1D{k,v}{k,v}1D{k,v}{k,v}1D{k,v}{k,v}" // // Therefore, instead of calling process method again, just iterate over each of // the frames and split them so they can be processed by LumberjackFrameHandler while (currentData.hasRemaining()) { int payloadLength = 0; internalFrameBuilder.version = currentData.get(); internalFrameBuilder.frameType = currentData.get(); internalFrameBuilder.seqNumber = currentData.getInt() & 0x00000000ffffffffL; currentData.mark(); // Set the payloadLength to negative to avoid doing math // around valueLength and valueLength payloadLength = payloadLength - currentData.position(); long pairCount = currentData.getInt() & 0x00000000ffffffffL; for (int i = 0; i < pairCount; i++) { long keyLength = currentData.getInt() & 0x00000000ffffffffL; currentData.position(currentData.position() + (int) keyLength); long valueLength = currentData.getInt() & 0x00000000ffffffffL; currentData.position(currentData.position() + (int) valueLength); } // Infer the length of the payload from position... payloadLength = payloadLength + currentData.position(); // Reset to mark (i.e. skip frame headers) prior to getting the data currentData.reset(); // get the data, shift mark and compact so next iteration can // read rest of buffer. byte[] bytes = new byte[payloadLength]; currentData.get(bytes, 0, payloadLength); currentData.mark(); // Add payload to frame internalFrameBuilder.payload(bytes); // data frame is created LumberjackFrame frame = internalFrameBuilder.build(); frames.add(frame); internalFrameBuilder.reset(); } return frames; } private void processVERSION(final byte b) { byte version = b; frameBuilder.version(version); logger.debug("Version number is {}", new Object[]{version}); currBytes.write(b); currState = LumberjackState.FRAMETYPE; } private void processFRAMETYPE(final byte b) { decodedFrameType = b; frameBuilder.frameType(decodedFrameType); logger.debug("Frame type is {}", new Object[]{decodedFrameType}); currBytes.write(b); currState = LumberjackState.PAYLOAD; } private void processPAYLOAD(final byte b) { currBytes.write(b); switch (decodedFrameType) { case FRAME_WINDOWSIZE: //'W' if (currBytes.size() < 6) { logger.trace("Lumberjack currBytes contents are {}", currBytes.toString()); break; } else if (currBytes.size() == 6) { frameBuilder.dataSize = ByteBuffer.wrap(java.util.Arrays.copyOfRange(currBytes.toByteArray(), 2, 6)).getInt() & 0x00000000ffffffffL; logger.debug("Data size is {}", new Object[]{frameBuilder.dataSize}); // Sets payload to empty as frame contains no data frameBuilder.payload(new byte[]{}); currBytes.reset(); currState = LumberjackState.COMPLETE; windowSize = frameBuilder.dataSize; break; } else { break; } case FRAME_COMPRESSED: //'C' if (currBytes.size() < 6) { logger.trace("Lumberjack currBytes contents are {}", currBytes.toString()); break; } else if (currBytes.size() >= 6) { frameBuilder.dataSize = ByteBuffer.wrap(java.util.Arrays.copyOfRange(currBytes.toByteArray(), 2, 6)).getInt() & 0x00000000ffffffffL; if (currBytes.size() - 6 == frameBuilder.dataSize) { try { byte[] buf = java.util.Arrays.copyOfRange(currBytes.toByteArray(), 6, currBytes.size()); InputStream in = new InflaterInputStream(new ByteArrayInputStream(buf)); ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] buffer = new byte[1024]; int len; while ((len = in.read(buffer)) > 0) { out.write(buffer, 0, len); } in.close(); out.close(); decompressedData = out.toByteArray(); // buf is no longer needed buf = null; logger.debug("Finished decompressing data"); // Decompression is complete, we should be able to proceed with resetting currBytes and curSrtate and iterating them // as type 'D' frames frameBuilder.dataSize(decompressedData.length); currState = LumberjackState.COMPLETE; } catch (IOException e) { throw new LumberjackFrameException("Error decompressing frame: " + e.getMessage(), e); } } break; // If currentByte.size is not lower than six and also not equal or great than 6... } else { break; } } } private void processCOMPLETE() { currBytes.reset(); frameBuilder.reset(); currState = LumberjackState.VERSION; } }