/*
* $Id: DefaultZCharDecoder.java 536 2008-02-19 06:03:27Z weiju $
*
* Created on 2006/01/09
* Copyright 2005-2008 by Wei-ju Wu
* This file is part of The Z-machine Preservation Project (ZMPP).
*
* ZMPP is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ZMPP is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ZMPP. If not, see <http://www.gnu.org/licenses/>.
*/
package org.zmpp.encoding;
import java.util.ArrayList;
import java.util.List;
import org.zmpp.base.Memory;
/**
* This is the default implementation of the ZCharDecoder interface. The central
* method is decode2Unicode which handles abbreviations, 10 Bit escape
* characters and alphabet table characters. Alphabet table characters and shift
* states are handled by the ZCharTranslator object.
*
* @author Wei-ju Wu
* @version 1.0
*/
public final class DefaultZCharDecoder implements ZCharDecoder {
private ZCharTranslator translator;
private ZsciiEncoding encoding;
private AbbreviationsTable abbreviations;
private ZCharDecoder abbreviationDecoder;
/**
* Constructor.
*
* @param encoding the ZsciiEncoding object
* @param translator the ZStringTranslator o
* @param abbreviations the abbreviations table used for decoding
*/
public DefaultZCharDecoder(final ZsciiEncoding encoding,
final ZCharTranslator translator,
final AbbreviationsTable abbreviations) {
this.abbreviations = abbreviations;
this.translator = translator;
this.encoding = encoding;
}
/**
* {@inheritDoc}
*/
public ZsciiString decode2Zscii(final Memory memory,
final int address, final int length) {
final ZsciiStringBuilder builder = new ZsciiStringBuilder();
translator.reset();
final char[] zbytes = extractZbytes(memory, address, length);
char zchar;
int i = 0, newpos;
while (i < zbytes.length) {
boolean decoded = false;
zchar = zbytes[i];
newpos = handleAbbreviation(builder, memory, zbytes, i);
decoded = (newpos > i);
i = newpos;
if (!decoded) {
newpos = handleEscapeA2(builder, zbytes, i);
decoded = newpos > i;
i = newpos;
}
if (!decoded) {
decodeZchar(builder, zchar);
i++;
}
}
return builder.toZsciiString();
}
private int handleAbbreviation(final ZsciiStringBuilder builder,
final Memory memory, final char[] data, final int pos) {
int position = pos;
final char zchar = data[position];
if (translator.isAbbreviation(zchar)) {
// we need to check if we are at the end of the buffer, even if an
// abbreviation is suggested. This happens e.g. in Zork I
if (position < (data.length - 1)) {
position++; // retrieve the next byte to determine the abbreviation
// the abbreviations table could be null, simply skip that part in this
// case
if (abbreviations != null) {
final int x = data[position];
final int entryNum = 32 * (zchar - 1) + x;
final int entryAddress = abbreviations.getWordAddress(entryNum);
if (abbreviationDecoder == null) {
// We only use one abbreviation decoder instance here, we need
// to clone the alphabet table, so the abbreviation decoding
// will not influence the continuation of the decoding process
try {
abbreviationDecoder = new DefaultZCharDecoder(encoding,
(ZCharTranslator) translator.clone(), null);
} catch (CloneNotSupportedException ex) {
// should never happen
ex.printStackTrace();
}
}
final ZsciiString abbrev = abbreviationDecoder.decode2Zscii(memory,
entryAddress, 0);
builder.append(abbrev);
}
}
position++;
}
return position;
}
private int handleEscapeA2(final ZsciiStringBuilder builder,
final char[] data, final int pos) {
int position = pos;
if (translator.willEscapeA2(data[position])) {
// If the data is truncated, do not continue (check if the
// constant should be 2 or 3)
if (position < data.length - 2) {
joinToZsciiChar(builder, data[position + 1], data[position + 2]);
// skip the three characters read (including the loop increment)
position += 2;
}
position++;
translator.resetToLastAlphabet();
}
return position;
}
/**
* {@inheritDoc}
*/
public char decodeZChar(final char zchar) {
if (ZsciiEncoding.isAscii(zchar) || ZsciiEncoding.isAccent(zchar)) {
return zchar;
} else {
return translator.translate(zchar);
}
}
/**
* Decodes an encoded character and adds it to the specified builder object.
*
* @param builder a ZsciiStringBuilder object
* @param zchar the encoded character to decode and add
*/
private void decodeZchar(final ZsciiStringBuilder builder,
final char zchar) {
final char c = decodeZChar(zchar);
if (c != 0) {
builder.append(c);
}
}
/**
* {@inheritDoc}
*/
public ZCharTranslator getTranslator() {
return translator;
}
// ***********************************************************************
// ******* Private
// *****************************
/**
* Determines the last word in a z sequence. The last word has the MSB set.
*
* @param zword the zword
* @return true if zword is the last word, false, otherwise
*/
public static boolean isEndWord(final short zword) {
return (zword & 0x8000) > 0;
}
/**
* This function unfortunately generates a List object on each invocation,
* the advantage is that it will return all the characters of the Z string.
*
* @param memory the memory access object
* @param address the address of the z string
* @param length the maximum length that the array should have or 0 for
* unspecified
* @return the z characters of the string
*/
public static char[] extractZbytes(final Memory memory,
final int address, final int length) {
short zword = 0;
int currentAddr = address;
final List<short[]> byteList = new ArrayList<short[]>();
do {
zword = memory.readShort(currentAddr);
byteList.add(extractBytes(zword));
currentAddr += 2; // increment pointer
// if this is a dictionary entry, we need to provide the
// length and cancel the loop earlier
if (length > 0 && (currentAddr - address) >= length) {
break;
}
} while (!isEndWord(zword));
final char[] result = new char[byteList.size() * 3];
int i = 0;
for (short[] triplet : byteList) {
for (short b : triplet) {
result[i++] = (char) b;
}
}
return result;
}
/**
* Extracts three 5 bit fields from the given 16 bit word and returns an
* array of three bytes containing these characters.
*
* @param zword a 16 bit word
* @return an array of three bytes containing the three 5-bit ZSCII
* characters encoded in the word
*/
private static short[] extractBytes(final short zword) {
final short[] result = new short[3];
result[2] = (short) (zword & 0x1f);
result[1] = (short) ((zword >> 5) & 0x1f);
result[0] = (short) ((zword >> 10) & 0x1f);
return result;
}
/**
* Joins the specified two bytes into a 10 bit ZSCII character.
*
* @param builder the StringBuilder to write to
* @param top the byte holding the top 5 bit of the zchar
* @param bottom the byte holding the bottom 5 bit of the zchar
*/
private void joinToZsciiChar(final ZsciiStringBuilder builder,
final char top, final char bottom) {
builder.append((char) (top << 5 | bottom));
}
}