/*
* Copyright (C) 2014 Civilian Framework.
*
* Licensed under the Civilian License (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.civilian-framework.org/license.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.civilian.util;
/**
* Charset is a small helper class to determine if a character
* is printable in a certain charset. (E.g. IS=-8859-1 can only
* print characters <= 0xff). We use this class
* in HTML output to determine if we need to print a character reference
* instead of the character itself.
* This is just used as a conservative heuristic so we do not intent
* comprehensive coverage of charsets (We wish we could get
* the information from java.nio.charset.Charset).
*/
public abstract class Charset
{
public static final Charset UNRESTRICTED = new UnrestrictedCharset();
public static final Charset SEVEN_BIT = new SimpleCharset(0x007f);
public static final Charset EIGHT_BIT = new SimpleCharset(0x00ff);
/**
* Returns a Charset for the charset name. If not known a eight-bit charset
* is returned.
* @param charsetName a charset name like UTF-8, ISO-8859-1
*/
public static Charset getCharset(String charsetName)
{
if (charsetName == null)
return EIGHT_BIT; // don't bother
charsetName = charsetName.toUpperCase();
if (charsetName.startsWith("UTF-"))
return UNRESTRICTED;
else if (startsWith(charsetName, "ISO-8859-"))
return EIGHT_BIT; // actually ISO-8859-2 could print more
else if (startsWith(charsetName, "EBCDIC-CP-"))
return EIGHT_BIT;
else if (startsWith(charsetName, "EUC-"))
return UNRESTRICTED;
else if (startsWith(charsetName, "ASCII"))
return SEVEN_BIT;
else
return EIGHT_BIT;
}
private static boolean startsWith(String name, String prefix)
{
return name.regionMatches(true /*ignore-case*/, 0, prefix, 0, prefix.length());
}
/**
* Tests if the charset can print the character.
*/
public abstract boolean isPrintable(char c);
}
class SimpleCharset extends Charset
{
public SimpleCharset(int lastPrintable)
{
lastPrintable_ = lastPrintable;
}
@Override
public boolean isPrintable(char c)
{
return c <= lastPrintable_;
}
private int lastPrintable_;
}
class UnrestrictedCharset extends Charset
{
@Override public boolean isPrintable(char c)
{
// in unicode not entirely correct for one half of surrogate pairs
// (c<0xD800) || (c>0xDBFF)
return true;
}
}