/* * Copyright (c) 2008-2010 Mozilla Foundation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ package org.whattf.datatype; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import org.whattf.datatype.data.CharsetData; import org.relaxng.datatype.DatatypeException; public class MetaCharset extends AbstractDatatype { /** * The singleton instance. */ public static final MetaCharset THE_INSTANCE = new MetaCharset(); // private static final Pattern THE_PATTERN = // Pattern.compile("^[tT][eE][xX][tT]/[hH][tT][mM][lL]; ?[cC][hH][aA][rR][sS][eE][tT]=[0-9a-zA-Z!#$%&'+_`{}~^-]+$"); public MetaCharset() { super(); } @Override public void checkValid(CharSequence literal) throws DatatypeException { String lower = toAsciiLowerCase(literal); if (!lower.startsWith("text/html;")) { throw newDatatypeException( "The legacy encoding declaration did not start with ", "text/html;", "."); } if (lower.length() == 10) { throw newDatatypeException("The legacy encoding declaration ended prematurely."); } int offset = 10; paramloop: for (int i = 10; i < lower.length(); i++) { char c = lower.charAt(i); switch (c) { case ' ': case '\t': case '\n': case '\u000C': case '\r': offset++; continue; case 'c': break paramloop; default: throw newDatatypeException( "The legacy encoding declaration" + " did not start with space characters or ", "charset=", " after the semicolon. " + " Found \u201c" + c + "\u201d instead."); } } if (!lower.startsWith("charset=", offset)) { throw newDatatypeException("The legacy encoding declaration" + "did not contain ", "charset=", " after the semicolon."); } offset += 8; if (lower.length() == offset) { throw newDatatypeException("The empty string is not a valid character encoding name."); } for (int i = offset; i < lower.length(); i++) { char c = lower.charAt(i); if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || c == '-' || c == '!' || c == '#' || c == '$' || c == '%' || c == '&' || c == '\'' || c == '+' || c == '_' || c == '`' || c == '{' || c == '}' || c == '~' || c == '^')) { throw newDatatypeException("The legacy encoding contained ", c, ", which is not a valid character in an encoding name."); } } String encodingName = lower.substring(offset); if ("replacement".equals(encodingName) || !CharsetData.isPreferred(encodingName)) { String preferred = CharsetData.preferredForLabel(encodingName); if (preferred == null || "replacement".equals(preferred)) { throw newDatatypeException("\u201c" + encodingName + "\u201d is not a valid character encoding name."); } throw newDatatypeException("\u201c" + encodingName + "\u201d is not a preferred encoding name." + " The preferred label for this encoding is \u201C" + preferred + "\u201D."); } } @Override public String getName() { return "legacy character encoding declaration"; } }