/* * $Id: ZCharEncoder.java 536 2008-02-19 06:03:27Z weiju $ * * Created on 2006/01/10 * Copyright 2005-2008 by Wei-ju Wu * This file is part of The Z-machine Preservation Project (ZMPP). * * ZMPP is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * ZMPP is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ZMPP. If not, see <http://www.gnu.org/licenses/>. */ package org.zmpp.encoding; import org.zmpp.base.Memory; import org.zmpp.encoding.AlphabetTable.Alphabet; /** * This class encodes ZSCII strings into dictionary encoded strings. Since * encoding is only needed from version 5, we can always assume a target entry * size of 6 bytes containing a maximum of nine characters. Encoding is pretty * difficult since there are several variables to remember during the encoding * process which would result in ugly code if stored in member variables. We use * the strategy of having an encoding state for a target word which is changed * and passed around until the word can written out. * * The encoding has some restrictions defined in the specification: The target * string is restricted to 6 bytes and 9 characters, which is the length of * dictionary entries and no abbreviations need to be taken into consideration. * * @author Wei-ju Wu * @version 1.0 */ public class ZCharEncoder { /** * The alphabet table. */ private ZCharTranslator translator; /** * The maximum entry length. */ private static final int MAX_ENTRY_LENGTH = 9; private static final int NUM_TARGET_BYTES = 6; private static final int TARGET_LAST_WORD = 4; static class EncodingState { public Memory memory; public int source; public int target; public int targetStart; public int currentWord; public int wordPosition; } public ZCharEncoder(final ZCharTranslator translator) { super(); this.translator = translator; } public void encode(final Memory memory, final int sourceAddress, final int length, final int targetAddress) { final int maxlen = Math.min(length, MAX_ENTRY_LENGTH); final EncodingState state = new EncodingState(); state.source = sourceAddress; state.target = targetAddress; state.targetStart = targetAddress; state.memory = memory; while (state.source < (sourceAddress + maxlen)) { processChar(state); } // Padding // This pads the incomplete last encoded word if (state.wordPosition <= 2 && state.target <= (state.targetStart + 4)) { int resultword = state.currentWord; for (int i = state.wordPosition; i < 3; i++) { resultword = writeByteToWord(resultword, (short) 5, i); } state.memory.writeUnsignedShort(state.target, resultword); state.target += 2; } // If we did not encode 3 shorts, fill the rest with 0x14a5's final int targetOffset = state.target - targetAddress; for (int i = targetOffset; i < NUM_TARGET_BYTES; i += 2) { //System.out.println("write padword: " + i); state.memory.writeUnsignedShort(targetAddress + i, 0x14a5); } // Always mark the last word as such, the last word is always // starting at the fifth byte final int lastword = memory.readUnsignedShort(targetAddress + TARGET_LAST_WORD); memory.writeUnsignedShort(targetAddress + TARGET_LAST_WORD, lastword | 0x8000); } private void processChar(final EncodingState state) { final char zsciiChar = (char) state.memory.readUnsignedByte(state.source++); final AlphabetElement element = translator.getAlphabetElementFor(zsciiChar); if (element.getAlphabet() == null) { final short zcharCode = element.getZCharCode(); // This is a ZMPP speciality, we do not want to end the string // in the middle of encoding, so we only encode if there is // enough space // how many slots left ? final int slotsleft = getSlotsLeft(state); if (slotsleft >= 4) { // Escape A2 processWord(state, (short) 5); processWord(state, (short) 6); processWord(state, getUpper5Bit(zcharCode)); processWord(state, getLower5Bit(zcharCode)); } else { for (int i = 0; i < slotsleft; i++) { processWord(state, (short) 5); } } } else { final Alphabet alphabet = element.getAlphabet(); final short zcharCode = element.getZCharCode(); if (alphabet == Alphabet.A1) { processWord(state, (short) 4); } else if (alphabet == Alphabet.A2) { processWord(state, (short) 5); } processWord(state, zcharCode); } } private int getSlotsLeft(final EncodingState state) { final int currentWord = (state.target - state.targetStart) / 2; return ((2 - currentWord) * 3) + (3 - state.wordPosition); } private void processWord(final EncodingState state, final short value) { state.currentWord = writeByteToWord(state.currentWord, value, state.wordPosition++); writeWordIfNeeded(state); } private void writeWordIfNeeded(final EncodingState state) { if (state.wordPosition > 2 && state.target <= (state.targetStart + 4)) { // Write the result and increment the target position state.memory.writeUnsignedShort(state.target, state.currentWord); state.target += 2; state.currentWord = 0; state.wordPosition = 0; } } private short getUpper5Bit(final short zsciiChar) { return (short) ((zsciiChar >>> 5) & 0x1f); } private short getLower5Bit(final short zsciiChar) { return (short) (zsciiChar & 0x1f); } /** * This function sets a byte value to the specified position within a word. * There are three positions within a 16 bit word and the bytes are * truncated such that only the lower 5 bit are taken as values. * * @param dataword the word to set * @param databyte the byte to set * @param pos a value between 0 and 2 * @return the new word with the databyte set in the position */ private static short writeByteToWord(final int dataword, final short databyte, final int pos) { final int shiftwidth = (2 - pos) * 5; return (short) (dataword | ((databyte & 0x1f) << shiftwidth)); } }