/*
* Copyright (C) 2007 Steve Ratcliffe
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*
* Author: Steve Ratcliffe
* Create date: Jan 1, 2008
*/
package uk.me.parabola.imgfmt.app.labelenc;
import uk.me.parabola.imgfmt.ExitException;
/**
* @author Steve Ratcliffe
*/
public class CodeFunctions {
// Label encoding length
public static final int ENCODING_FORMAT6 = 6;
private static final int ENCODING_FORMAT9 = 9;
private static final int ENCODING_FORMAT10 = 10;
private int codepage;
private int encodingType;
private CharacterEncoder encoder;
private CharacterDecoder decoder;
protected void setEncoder(CharacterEncoder encoder) {
this.encoder = encoder;
}
public CharacterEncoder getEncoder() {
return encoder;
}
private void setDecoder(CharacterDecoder decoder) {
this.decoder = decoder;
}
public CharacterDecoder getDecoder() {
return decoder;
}
public int getEncodingType() {
return encodingType;
}
private void setEncodingType(int encodingType) {
this.encodingType = encodingType;
}
public int getCodepage() {
return codepage;
}
protected void setCodepage(int codepage) {
this.codepage = codepage;
}
/**
* Create a CharacterEncoder for the given charset option. Note that this
* routine also writes to the lblHeader parameter to set the encoding type.
* @param charset The mkgmap command line option to be interpreted.
* @return The various character set parameters that will be needed.
*/
public static CodeFunctions createEncoderForLBL(String charset) {
CodeFunctions funcs = new CodeFunctions();
switch (charset) {
case "ascii":
funcs.setEncodingType(ENCODING_FORMAT6);
funcs.setEncoder(new Format6Encoder());
funcs.setDecoder(new Format6Decoder());
break;
case "cp0": // This is used for ascii but with the single byte format
funcs.setEncodingType(ENCODING_FORMAT9);
funcs.setEncoder(new AnyCharsetEncoder("ascii", new TableTransliterator("ascii")));
funcs.setDecoder(new AnyCharsetDecoder("ascii"));
funcs.setCodepage(0);
break;
case "cp1252":
case "latin1":
funcs.setEncodingType(ENCODING_FORMAT9);
funcs.setEncoder(new AnyCharsetEncoder("cp1252", new TableTransliterator("latin1")));
funcs.setDecoder(new AnyCharsetDecoder("cp1252"));
funcs.setCodepage(1252);
break;
case "cp65001":
case "unicode":
funcs.setEncodingType(ENCODING_FORMAT10);
funcs.setEncoder(new Utf8Encoder());
funcs.setDecoder(new Utf8Decoder());
funcs.setCodepage(65001);
break;
case "cp932":
case "ms932":
funcs.setEncodingType(ENCODING_FORMAT10);
funcs.setEncoder(new AnyCharsetEncoder("ms932", new SparseTransliterator("nomacron")));
funcs.setDecoder(new AnyCharsetDecoder("ms932"));
funcs.setCodepage(932);
break;
default:
funcs.setEncodingType(ENCODING_FORMAT9);
funcs.setDecoder(new AnyCharsetDecoder(charset));
funcs.setEncoder(new AnyCharsetEncoder(charset, new TableTransliterator("ascii")));
funcs.setCodepage(guessCodepage(charset));
break;
}
return funcs;
}
/**
* Sets encoding functions for a given format and code page. This is used
* when reading from an existing file.
*
* @param format The format from the lbl header.
* @param codePage The codepage found in the header.
* @return The various character set parameters that will be needed.
*/
public static CodeFunctions createEncoderForLBL(int format, int codePage) {
CodeFunctions funcs;
if (format == ENCODING_FORMAT6) {
funcs = createEncoderForLBL("ascii");
} else {
funcs = createEncoderForLBL("cp" + codePage);
}
return funcs;
}
/**
* Guess the code page from the given charset. Only works with things
* like cp1252, windows-1252 and some well known ones.
* @param charset The charset that was given.
*/
private static int guessCodepage(String charset) {
String cs = charset.toLowerCase();
if (cs.startsWith("cp")) {
try {
return Integer.parseInt(charset.substring(2));
} catch (NumberFormatException e) {
// wasn't in the right form
throw new ExitException("Invalid character set: " + cs);
}
} else if (cs.startsWith("windows-")) {
try {
return Integer.parseInt(charset.substring(8));
} catch (NumberFormatException e) {
// wasn't in the right form to guess
throw new ExitException("Invalid character set: " + cs);
}
} else if (cs.equals("latin1")) {
return 1252;
}
return 0;
}
public static CharacterEncoder getDefaultEncoder() {
return new Format6Encoder();
}
public static CharacterDecoder getDefaultDecoder() {
return new Format6Decoder();
}
}