/* * Copyright 2013 The Netty Project * * The Netty Project licenses this file to you under the Apache License, * version 2.0 (the "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package io.netty.handler.codec.xml; import io.netty.buffer.ByteBuf; import io.netty.channel.ChannelHandlerContext; import io.netty.handler.codec.ByteToMessageDecoder; import io.netty.handler.codec.CorruptedFrameException; import io.netty.handler.codec.TooLongFrameException; import java.util.List; /** * A frame decoder for single separate XML based message streams. * <p/> * A couple examples will better help illustrate * what this decoder actually does. * <p/> * Given an input array of bytes split over 3 frames like this: * <pre> * +-----+-----+-----------+ * | <an | Xml | Element/> | * +-----+-----+-----------+ * </pre> * <p/> * this decoder would output a single frame: * <p/> * <pre> * +-----------------+ * | <anXmlElement/> | * +-----------------+ * </pre> * * Given an input array of bytes split over 5 frames like this: * <pre> * +-----+-----+-----------+-----+----------------------------------+ * | <an | Xml | Element/> | <ro | ot><child>content</child></root> | * +-----+-----+-----------+-----+----------------------------------+ * </pre> * <p/> * this decoder would output two frames: * <p/> * <pre> * +-----------------+-------------------------------------+ * | <anXmlElement/> | <root><child>content</child></root> | * +-----------------+-------------------------------------+ * </pre> * * Please note that this decoder is not suitable for * xml streaming protocols such as * <a href="http://xmpp.org/rfcs/rfc6120.html">XMPP</a>, * where an initial xml element opens the stream and only * gets closed at the end of the session, although this class * could probably allow for such type of message flow with * minor modifications. */ public class XmlFrameDecoder extends ByteToMessageDecoder { private final int maxFrameLength; public XmlFrameDecoder(int maxFrameLength) { if (maxFrameLength < 1) { throw new IllegalArgumentException("maxFrameLength must be a positive int"); } this.maxFrameLength = maxFrameLength; } @Override protected void decode(ChannelHandlerContext ctx, ByteBuf in, List<Object> out) throws Exception { boolean openingBracketFound = false; boolean atLeastOneXmlElementFound = false; boolean inCDATASection = false; long openBracketsCount = 0; int length = 0; int leadingWhiteSpaceCount = 0; final int bufferLength = in.writerIndex(); if (bufferLength > maxFrameLength) { // bufferLength exceeded maxFrameLength; dropping frame in.skipBytes(in.readableBytes()); fail(bufferLength); return; } for (int i = in.readerIndex(); i < bufferLength; i++) { final byte readByte = in.getByte(i); if (!openingBracketFound && Character.isWhitespace(readByte)) { // xml has not started and whitespace char found leadingWhiteSpaceCount++; } else if (!openingBracketFound && readByte != '<') { // garbage found before xml start fail(ctx); in.skipBytes(in.readableBytes()); return; } else if (!inCDATASection && readByte == '<') { openingBracketFound = true; if (i < bufferLength - 1) { final byte peekAheadByte = in.getByte(i + 1); if (peekAheadByte == '/') { // found </, we must check if it is enclosed int peekFurtherAheadIndex = i + 2; while (peekFurtherAheadIndex <= bufferLength - 1) { //if we have </ and enclosing > we can decrement openBracketsCount if (in.getByte(peekFurtherAheadIndex) == '>') { openBracketsCount--; break; } peekFurtherAheadIndex++; } } else if (isValidStartCharForXmlElement(peekAheadByte)) { atLeastOneXmlElementFound = true; // char after < is a valid xml element start char, // incrementing openBracketsCount openBracketsCount++; } else if (peekAheadByte == '!') { if (isCommentBlockStart(in, i)) { // <!-- comment --> start found openBracketsCount++; } else if (isCDATABlockStart(in, i)) { // <![CDATA[ start found openBracketsCount++; inCDATASection = true; } } else if (peekAheadByte == '?') { // <?xml ?> start found openBracketsCount++; } } } else if (!inCDATASection && readByte == '/') { if (i < bufferLength - 1 && in.getByte(i + 1) == '>') { // found />, decrementing openBracketsCount openBracketsCount--; } } else if (readByte == '>') { length = i + 1; if (i - 1 > -1) { final byte peekBehindByte = in.getByte(i - 1); if (!inCDATASection) { if (peekBehindByte == '?') { // an <?xml ?> tag was closed openBracketsCount--; } else if (peekBehindByte == '-' && i - 2 > -1 && in.getByte(i - 2) == '-') { // a <!-- comment --> was closed openBracketsCount--; } } else if (peekBehindByte == ']' && i - 2 > -1 && in.getByte(i - 2) == ']') { // a <![CDATA[...]]> block was closed openBracketsCount--; inCDATASection = false; } } if (atLeastOneXmlElementFound && openBracketsCount == 0) { // xml is balanced, bailing out break; } } } final int readerIndex = in.readerIndex(); int xmlElementLength = length - readerIndex; if (openBracketsCount == 0 && xmlElementLength > 0) { if (readerIndex + xmlElementLength >= bufferLength) { xmlElementLength = in.readableBytes(); } final ByteBuf frame = extractFrame(in, readerIndex + leadingWhiteSpaceCount, xmlElementLength - leadingWhiteSpaceCount); in.skipBytes(xmlElementLength); out.add(frame); } } private void fail(long frameLength) { if (frameLength > 0) { throw new TooLongFrameException( "frame length exceeds " + maxFrameLength + ": " + frameLength + " - discarded"); } else { throw new TooLongFrameException( "frame length exceeds " + maxFrameLength + " - discarding"); } } private static void fail(ChannelHandlerContext ctx) { ctx.fireExceptionCaught(new CorruptedFrameException("frame contains content before the xml starts")); } private static ByteBuf extractFrame(ByteBuf buffer, int index, int length) { return buffer.copy(index, length); } /** * Asks whether the given byte is a valid * start char for an xml element name. * <p/> * Please refer to the * <a href="http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-NameStartChar">NameStartChar</a> * formal definition in the W3C XML spec for further info. * * @param b the input char * @return true if the char is a valid start char */ private static boolean isValidStartCharForXmlElement(final byte b) { return b >= 'a' && b <= 'z' || b >= 'A' && b <= 'Z' || b == ':' || b == '_'; } private static boolean isCommentBlockStart(final ByteBuf in, final int i) { return i < in.writerIndex() - 3 && in.getByte(i + 2) == '-' && in.getByte(i + 3) == '-'; } private static boolean isCDATABlockStart(final ByteBuf in, final int i) { return i < in.writerIndex() - 8 && in.getByte(i + 2) == '[' && in.getByte(i + 3) == 'C' && in.getByte(i + 4) == 'D' && in.getByte(i + 5) == 'A' && in.getByte(i + 6) == 'T' && in.getByte(i + 7) == 'A' && in.getByte(i + 8) == '['; } }