/* * Copyright (c) 2007-2008 Mozilla Foundation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ package org.whattf.datatype; import org.relaxng.datatype.DatatypeException; import org.relaxng.datatype.DatatypeStreamingValidator; import org.relaxng.datatype.ValidationContext; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.text.UnicodeSet; public abstract class AbstractUnicodeClassCharacter extends AbstractDatatype { private static final int SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; protected abstract UnicodeSet getUnicodeSet(); @Override public void checkValid(CharSequence literal) throws DatatypeException { switch (literal.length()) { case 0: throw newDatatypeException("The empty string is not a " + getName() + "."); case 1: char c = literal.charAt(0); if (!getUnicodeSet().contains(c)) { throw newDatatypeException(0, "The character ", c, " is not a " + getName() + "."); } return; case 2: char hi = literal.charAt(0); char lo = literal.charAt(1); if ((lo & 0xFC00) == 0xDC00 && (hi & 0xFC00) == 0xD800) { int codepoint = (hi << 10) + lo + SURROGATE_OFFSET; if (!getUnicodeSet().contains(codepoint)) { throw newDatatypeException(0, "The character ", "" + hi + lo, " is not a " + getName() + "."); } return; } // else fall through. default: throw newDatatypeException("A " + getName() + " must be a single character."); } } /** * @see org.whattf.datatype.AbstractDatatype#createStreamingValidator(org.relaxng.datatype.ValidationContext) */ @Override public DatatypeStreamingValidator createStreamingValidator( ValidationContext context) { return new DatatypeStreamingValidator () { int codepoint = -2; private void addCharacter(char c) { if (codepoint == -1) { return; } else if ((codepoint & 0xFC00) == 0xD800) { if ((c & 0xFC00) == 0xDC00) { codepoint = (codepoint << 10) + c + SURROGATE_OFFSET; } else { codepoint = -1; } } else if (codepoint == -2) { codepoint = c; } else { codepoint = -1; } } public void addCharacters(char[] buf, int start, int len) { if (codepoint == -1) { return; } else { for (int i = start; i < start + len; i++) { addCharacter(buf[i]); } } } public void checkValid() throws DatatypeException { if (codepoint == -2) { throw newDatatypeException("The empty string is not a " + getName() + "."); } else if (codepoint == -1) { throw newDatatypeException("A " + getName() + " must be a single character."); } else if (!getUnicodeSet().contains(codepoint)) { throw newDatatypeException(0, "The character ", UCharacter.toString(codepoint), " is not a " + getName() + "."); } } public boolean isValid() { try { checkValid(); return true; } catch (DatatypeException e) { return false; } } }; } }