/** * VMware Continuent Tungsten Replicator * Copyright (C) 2015 VMware, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Initial developer(s): Robert Hodges * Contributor(s): */ package com.continuent.tungsten.common.parsing.bytes; /** * Implements a finite state machine for byte string translation. The state * machine uses a trie structure to track strings that have special meanings. * The {@link #add(byte)} method inserts a new byte "event" into the state * machine for processing. * * @author <a href="mailto:robert.hodges@continuent.com">Robert Hodges</a> * @version 1.0 */ public class ByteTranslationStateMachine { // Link in a trie of byte strings. class ByteTrie { byte value; ByteState state = ByteState.BUFFERING; int token = -1; byte[] substitute; boolean escape; ByteTrie[] links = new ByteTrie[256]; ByteTrie(byte value, ByteState state, int token, byte[] substitute, boolean escape) { this.value = value; this.state = state; this.token = token; this.escape = escape; this.substitute = substitute; } ByteTrie(byte value) { this.value = value; } } // Head of state machine. This is a dummy entry. ByteTrie head = new ByteTrie((byte) 0x00, ByteState.NONE, -1, null, false); // Current location in state machine as we walk the trie. ByteTrie current; // Last value accepted. ByteTrie lastAccepted = head; // True if last byte processed was an escape character. boolean escape; public ByteTranslationStateMachine() { } /** * Initializes the state machine. Must be the first call to the state * machine. */ public void init() { current = head; escape = false; } /** * Loads a string sequence into the state machine. * * @param value Byte array containing sequence * @param token If the string is a token value, a constant equal to or * greater than zero to identify the token * @param substitute An alternative value that is a substitute that should * be accepted instead of the parsed string * @param escape True if this represents an escape sequence. The next byte * after the escape sequence is accepted. */ public void load(byte[] value, int token, byte[] substitute, boolean escape) { ByteTrie bt = head; for (int i = 0; i < value.length; i++) { int index = value[i] & 0xFF; ByteTrie existing = bt.links[index]; if (i == value.length - 1) { // We are at the end. if (existing == null) { if (escape) { bt.links[index] = new ByteTrie(value[i], ByteState.BUFFERING, token, substitute, escape); } else { bt.links[index] = new ByteTrie(value[i], ByteState.ACCEPTED, token, substitute, escape); } } else { // We don't have unique leaf node, which means the structure // is ambiguous. throw new UnsupportedOperationException( "Byte string would result in an ambiguous byte state machine: " + new String(value)); } } else { if (existing == null) { bt.links[index] = new ByteTrie(value[i]); } } // We either found or have created the next entry. Get it now. bt = bt.links[index]; } } /** * Add a byte to the state machine and return the corresponding state. * Clients should fetch substitute strings and tokens after this call * occurs. */ public ByteState add(byte b) { lastAccepted = head; ByteState state = ByteState.NONE; if (escape) { // Previous value was an escape character. state = ByteState.ESCAPE; escape = false; current = head; } else { int index = b & 0xFF; ByteTrie next = current.links[index]; if (next == null) { // This is an ordinary character. state = ByteState.ACCEPTED; current = head; } else { // We are processing a string of 1 or more characters. state = next.state; lastAccepted = next; if (next.escape) { current = next; escape = true; } else if (state == ByteState.ACCEPTED) { current = head; } else { current = next; } } } return state; } /** * Return true if a substitute string is offered. */ public boolean isSubstitute() { return (lastAccepted.substitute != null); } /** Returns the last string substition. */ public byte[] getSubstitute() { return lastAccepted.substitute; } /** Returns true if the last accepted string is a token. */ public boolean isToken() { return lastAccepted.token >= 0; } /** Returns the last token value. */ public int getToken() { return lastAccepted.token; } }