/*
* Copyright (c) 2008-2010 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package org.whattf.datatype;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import org.whattf.datatype.data.CharsetData;
import org.relaxng.datatype.DatatypeException;
public class MetaCharset extends AbstractDatatype {
/**
* The singleton instance.
*/
public static final MetaCharset THE_INSTANCE = new MetaCharset();
// private static final Pattern THE_PATTERN =
// Pattern.compile("^[tT][eE][xX][tT]/[hH][tT][mM][lL]; ?[cC][hH][aA][rR][sS][eE][tT]=[0-9a-zA-Z!#$%&'+_`{}~^-]+$");
public MetaCharset() {
super();
}
@Override public void checkValid(CharSequence literal)
throws DatatypeException {
String lower = toAsciiLowerCase(literal);
if (!lower.startsWith("text/html;")) {
throw newDatatypeException(
"The legacy encoding declaration did not start with ",
"text/html;", ".");
}
if (lower.length() == 10) {
throw newDatatypeException("The legacy encoding declaration ended prematurely.");
}
int offset = 10;
paramloop: for (int i = 10; i < lower.length(); i++) {
char c = lower.charAt(i);
switch (c) {
case ' ':
case '\t':
case '\n':
case '\u000C':
case '\r':
offset++;
continue;
case 'c':
break paramloop;
default:
throw newDatatypeException(
"The legacy encoding declaration"
+ " did not start with space characters or ",
"charset=", " after the semicolon. "
+ " Found \u201c" + c + "\u201d instead.");
}
}
if (!lower.startsWith("charset=", offset)) {
throw newDatatypeException("The legacy encoding declaration"
+ "did not contain ", "charset=", " after the semicolon.");
}
offset += 8;
if (lower.length() == offset) {
throw newDatatypeException("The empty string is not a valid character encoding name.");
}
for (int i = offset; i < lower.length(); i++) {
char c = lower.charAt(i);
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || c == '-'
|| c == '!' || c == '#' || c == '$' || c == '%' || c == '&'
|| c == '\'' || c == '+' || c == '_' || c == '`'
|| c == '{' || c == '}' || c == '~' || c == '^')) {
throw newDatatypeException("The legacy encoding contained ", c,
", which is not a valid character in an encoding name.");
}
}
String encodingName = lower.substring(offset);
if ("replacement".equals(encodingName) || !CharsetData.isPreferred(encodingName)) {
String preferred = CharsetData.preferredForLabel(encodingName);
if (preferred == null || "replacement".equals(preferred)) {
throw newDatatypeException("\u201c" + encodingName
+ "\u201d is not a valid character encoding name.");
}
throw newDatatypeException("\u201c" + encodingName
+ "\u201d is not a preferred encoding name." + " The preferred label for this encoding is \u201C"
+ preferred + "\u201D.");
}
}
@Override public String getName() {
return "legacy character encoding declaration";
}
}