/* * JBoss, Home of Professional Open Source. * Copyright 2014 Red Hat, Inc., and individual contributors * as indicated by the @author tags. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.undertow.websockets.core.protocol.version07; import io.undertow.server.protocol.framed.FrameHeaderData; import io.undertow.websockets.core.WebSocketMessages; import io.undertow.websockets.core.function.ChannelFunction; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; /** * An utility class which can be used to check if a sequence of bytes or ByteBuffers contain non UTF-8 data. * <p> * Please use a new instance per stream. * * @author <a href="mailto:nmaurer@redhat.com">Norman Maurer</a> */ public class UTF8Checker implements ChannelFunction { private static final int UTF8_ACCEPT = 0; private static final int UTF8_REJECT = 12; private static final byte[] TYPES = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}; private static final byte[] STATES = {0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12}; private int state = UTF8_ACCEPT; private void checkUTF8(int b) throws UnsupportedEncodingException { byte type = TYPES[b & 0xFF]; state = STATES[state + type]; if (state == UTF8_REJECT) { throw WebSocketMessages.MESSAGES.invalidTextFrameEncoding(); } } /** * Check if the given ByteBuffer contains non UTF-8 data. * * @param buf the ByteBuffer to check * @param position the index in the {@link ByteBuffer} to start from * @param length the number of bytes to operate on * @throws UnsupportedEncodingException is thrown if non UTF-8 data is found */ private void checkUTF8(ByteBuffer buf, int position, int length) throws UnsupportedEncodingException { int limit = position + length; for (int i = position; i < limit; i++) { checkUTF8(buf.get(i)); } } @Override public void newFrame(FrameHeaderData headerData) { } @Override public void afterRead(ByteBuffer buf, int position, int length) throws IOException{ checkUTF8(buf, position, length); } @Override public void beforeWrite(ByteBuffer buf, int position, int length) throws UnsupportedEncodingException{ checkUTF8(buf, position, length); } @Override public void complete() throws UnsupportedEncodingException { if (state != UTF8_ACCEPT) { throw WebSocketMessages.MESSAGES.invalidTextFrameEncoding(); } } }